Replace RE2 import with a dependency

TBR=armansito@chromium.org,reillyg@chromium.org,piman@chromium.org
BUG=568119

Review URL: https://codereview.chromium.org/1544433002

Cr-Commit-Position: refs/heads/master@{#366412}

This commit is contained in:

battre

2015-12-21 09:45:27 -08:00

committed by

Commit bot

parent e8e3eaa1b2

commit 0739c642c9

160 changed files with 135 additions and 47064 deletions

.gitignore DEPS

chrome/browser

android

data_usage

data_use_matcher.cc

chromeos

hwid_checker.cc

policy

system_log_uploader.cc

download

download_query.cc

extensions

activity_log

activity_log.cc

password_manager

chrome_password_manager_client.cc

local_files_ntp_source.cc

webui

extensions

extension_loader_handler.cc

components

autofill

content

renderer

password_form_conversion_utils.cc

core

browser

form_structure.cc

dom_distiller

core

page_features.cc

drive

drive_api_util.cc

json_schema

json_schema_validator.cc

keyed_service

core

dependency_graph_unittest.cc

password_manager

core

browser

import

csv_reader.cc

plugins

renderer

plugin_placeholder.cc

policy

core

common

schema.cc

url_matcher

regex_set_matcher.cc url_matcher_factory.cc

webcrypto

algorithms

test_helpers.cc

content/shell/android/linker_test_apk

chromium_linker_test_linker_tests.cc

device/serial

serial_device_enumerator_win.cc serial_io_handler_win.cc

extensions

browser

api

declarative_webrequest

webrequest_action.cc

web_request

form_data_parser.cc

common

image_util.cc stack_frame.cc

gpu

command_buffer

service

program_manager.cc

config

gpu_control_list.cc gpu_info_collector_win.cc

third_party

leveldatabase

env_chromium.cc

re2

.gitignore AUTHORS BUILD BUILD.gn CMakeLists.txt CONTRIBUTING.md CONTRIBUTORS Makefile README WORKSPACE

benchlog

benchlog.c2 benchlog.mini benchlog.r70 benchlog.wreck benchplot.py mktable

doc

README.xkcd mksyntaxgo mksyntaxhtml mksyntaxwiki syntax.html syntax.txt xkcd.png

lib

git

commit-msg.hook

libre2.symbols libre2.symbols.darwin re2.gyp re2.pc

re2

bitstate.cc compile.cc dfa.cc filtered_re2.cc filtered_re2.h make_perl_groups.pl make_unicode_casefold.py make_unicode_groups.py mimics_pcre.cc nfa.cc onepass.cc parse.cc perl_groups.cc prefilter.cc prefilter.h prefilter_tree.cc prefilter_tree.h prog.cc prog.h re2.cc re2.h regexp.cc regexp.h set.cc set.h simplify.cc stringpiece.cc stringpiece.h

testing

tostring.cc unicode.py unicode_casefold.cc unicode_casefold.h unicode_groups.cc unicode_groups.h variadic_function.h walker-inl.h

re2_test.bzl runtests testinstall.cc ucs2.diff

util

tools/ipc_fuzzer/message_tools

message_util.cc

1

.gitignore vendored

 @ -388,6 +388,7 @@ vs-chromium-project.txt
 /third_party/python_26
 /third_party/pywebsocket/src
 /third_party/pywebsocket/src
 /third_party/re2/src
 /third_party/requests/src
 /third_party/robolectric/lib
 /third_party/safe_browsing/testing

3

DEPS

 @ -275,6 +275,9 @@ deps = {
   'src/third_party/openh264/src':
     Var('chromium_git') + '/external/github.com/cisco/openh264' + '@' + 'b37cda248234162033e3e11b0335f3131cdfe488',
   'src/third_party/re2/src':
     Var('chromium_git') + '/external/github.com/google/re2.git' + '@' + 'dba3349aba83b5588e85e5ecf2b56c97f2d259b7',
 }

									
										2

chrome/browser/android/data_usage/data_use_matcher.cc
									
				@ -11,7 +11,7 @@

				#include "base/time/default_tick_clock.h"

				#include "base/time/tick_clock.h"

				#include "chrome/browser/android/data_usage/external_data_use_observer.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "url/gurl.h"

				namespace chrome {

									
										2

chrome/browser/chromeos/login/hwid_checker.cc
									
				@ -14,7 +14,7 @@

				#include "chromeos/chromeos_switches.h"

				#include "chromeos/system/statistics_provider.h"

				#include "content/public/common/content_switches.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "third_party/zlib/zlib.h"

				namespace {

									
										2

chrome/browser/chromeos/policy/system_log_uploader.cc
									
				@ -22,7 +22,7 @@

				#include "components/policy/core/common/cloud/enterprise_metrics.h"

				#include "content/public/browser/browser_thread.h"

				#include "net/http/http_request_headers.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace {

				// The maximum number of successive retries.

									
										2

chrome/browser/download/download_query.cc
									
				@ -31,7 +31,7 @@

				#include "components/url_formatter/url_formatter.h"

				#include "content/public/browser/content_browser_client.h"

				#include "content/public/browser/download_item.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "url/gurl.h"

				using content::DownloadDangerType;

									
										2

chrome/browser/extensions/activity_log/activity_log.cc
									
				@ -36,7 +36,7 @@

				#include "extensions/browser/extensions_browser_client.h"

				#include "extensions/common/extension.h"

				#include "extensions/common/one_shot_event.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "url/gurl.h"

				namespace constants = activity_log_constants;

									
										2

chrome/browser/password_manager/chrome_password_manager_client.cc
									
				@ -53,7 +53,7 @@

				#include "content/public/browser/web_contents.h"

				#include "google_apis/gaia/gaia_urls.h"

				#include "net/base/url_util.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#if defined(OS_MACOSX) || BUILDFLAG(ANDROID_JAVA_UI)

				#include "chrome/browser/password_manager/save_password_infobar_delegate.h"

									
										4

chrome/browser/search/local_files_ntp_source.cc
									
				@ -18,8 +18,8 @@

				#include "chrome/common/url_constants.h"

				#include "content/public/browser/browser_thread.h"

				#include "content/public/browser/url_data_source.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/re2/stringpiece.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "third_party/re2/src/re2/stringpiece.h"

				namespace {

									
										2

chrome/browser/ui/webui/extensions/extension_loader_handler.cc
									
				@ -25,7 +25,7 @@

				#include "extensions/common/constants.h"

				#include "extensions/common/extension.h"

				#include "extensions/common/manifest_constants.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "ui/base/l10n/l10n_util.h"

				namespace extensions {

									
										2

components/autofill/content/renderer/password_form_conversion_utils.cc
									
				@ -22,7 +22,7 @@

				#include "third_party/WebKit/public/web/WebFormControlElement.h"

				#include "third_party/WebKit/public/web/WebFrame.h"

				#include "third_party/WebKit/public/web/WebInputElement.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				using blink::WebDocument;

				using blink::WebFormControlElement;

									
										2

components/autofill/core/browser/form_structure.cc
									
				@ -34,7 +34,7 @@

				#include "components/rappor/rappor_service.h"

				#include "components/rappor/rappor_utils.h"

				#include "third_party/libxml/chromium/libxml_utils.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace autofill {

				namespace {

									
										2

components/dom_distiller/core/page_features.cc
									
				@ -7,7 +7,7 @@

				#include <string>

				#include "base/json/json_reader.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "url/gurl.h"

				namespace dom_distiller {

									
										2

components/drive/drive_api_util.cc
									
				@ -18,7 +18,7 @@

				#include "google_apis/drive/drive_api_parser.h"

				#include "net/base/escape.h"

				#include "net/base/net_errors.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "url/gurl.h"

				namespace drive {

									
										2

components/json_schema/json_schema_validator.cc
									
				@ -17,7 +17,7 @@

				#include "base/strings/stringprintf.h"

				#include "base/values.h"

				#include "components/json_schema/json_schema_constants.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace schema = json_schema_constants;

									
										2

components/keyed_service/core/dependency_graph_unittest.cc
									
				@ -8,7 +8,7 @@

				#include "components/keyed_service/core/dependency_graph.h"

				#include "components/keyed_service/core/dependency_node.h"

				#include "testing/gtest/include/gtest/gtest.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace {

									
										2

components/password_manager/core/browser/import/csv_reader.cc
									
				@ -6,7 +6,7 @@

				#include "base/logging.h"

				#include "base/strings/string_util.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace {

									
										2

components/plugins/renderer/plugin_placeholder.cc
									
				@ -11,7 +11,7 @@

				#include "gin/object_template_builder.h"

				#include "third_party/WebKit/public/web/WebElement.h"

				#include "third_party/WebKit/public/web/WebPluginContainer.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace plugins {

									
										2

components/policy/core/common/schema.cc
									
				@ -17,7 +17,7 @@

				#include "components/json_schema/json_schema_constants.h"

				#include "components/json_schema/json_schema_validator.h"

				#include "components/policy/core/common/schema_internal.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace schema = json_schema_constants;

									
										4

components/url_matcher/regex_set_matcher.cc
									
				@ -8,8 +8,8 @@

				#include "base/stl_util.h"

				#include "base/strings/string_util.h"

				#include "components/url_matcher/substring_set_matcher.h"

				#include "third_party/re2/re2/filtered_re2.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/filtered_re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace url_matcher {

									
										2

components/url_matcher/url_matcher_factory.cc
									
				@ -13,7 +13,7 @@

				#include "base/values.h"

				#include "components/url_matcher/url_matcher_constants.h"

				#include "components/url_matcher/url_matcher_helpers.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace url_matcher {

									
										2

components/webcrypto/algorithms/test_helpers.cc
									
				@ -23,7 +23,7 @@

				#include "components/webcrypto/status.h"

				#include "third_party/WebKit/public/platform/WebCryptoAlgorithmParams.h"

				#include "third_party/WebKit/public/platform/WebCryptoKeyAlgorithm.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace webcrypto {

									
										2

content/shell/android/linker_test_apk/chromium_linker_test_linker_tests.cc
									
				@ -20,7 +20,7 @@

				#include "base/logging.h"

				#include "base/strings/stringprintf.h"

				#include "jni/LinkerTests_jni.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace content {

									
										2

device/serial/serial_device_enumerator_win.cc
									
				@ -16,7 +16,7 @@

				#include "base/strings/stringprintf.h"

				#include "base/strings/utf_string_conversions.h"

				#include "base/win/registry.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace device {

									
										2

device/serial/serial_io_handler_win.cc
									
				@ -12,7 +12,7 @@

				#include "base/threading/thread_checker.h"

				#include "device/core/device_info_query_win.h"

				#include "device/core/device_monitor_win.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace device {

									
										2

extensions/browser/api/declarative_webrequest/webrequest_action.cc
									
				@ -28,7 +28,7 @@

				#include "net/base/registry_controlled_domains/registry_controlled_domain.h"

				#include "net/http/http_util.h"

				#include "net/url_request/url_request.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				using content::ResourceRequestInfo;

									
										2

extensions/browser/api/web_request/form_data_parser.cc
									
				@ -13,7 +13,7 @@

				#include "base/values.h"

				#include "net/base/escape.h"

				#include "net/url_request/url_request.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				using base::DictionaryValue;

				using base::ListValue;

									
										2

extensions/common/image_util.cc
									
				@ -10,7 +10,7 @@

				#include "base/strings/string_number_conversions.h"

				#include "base/strings/string_util.h"

				#include "base/strings/stringprintf.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "third_party/skia/include/core/SkColor.h"

				#include "ui/gfx/color_utils.h"

									
										2

extensions/common/stack_frame.cc
									
				@ -7,7 +7,7 @@

				#include <string>

				#include "base/strings/utf_string_conversions.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace extensions {

									
										2

gpu/command_buffer/service/program_manager.cc
									
				@ -29,7 +29,7 @@

				#include "gpu/command_buffer/service/gpu_switches.h"

				#include "gpu/command_buffer/service/program_cache.h"

				#include "gpu/command_buffer/service/shader_manager.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include "ui/gl/gl_version_info.h"

				using base::TimeDelta;

									
										2

gpu/config/gpu_control_list.cc
									
				@ -17,7 +17,7 @@

				#include "base/sys_info.h"

				#include "gpu/config/gpu_info.h"

				#include "gpu/config/gpu_util.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				namespace gpu {

				namespace {

									
										2

gpu/config/gpu_info_collector_win.cc
									
				@ -5,7 +5,7 @@

				#include "gpu/config/gpu_info_collector.h"

				// This has to be included before windows.h.

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				#include <windows.h>

				#include <cfgmgr32.h>

									
										2

third_party/leveldatabase/env_chromium.cc
									
										vendored
									
				@ -19,7 +19,7 @@

				#include "base/threading/thread_restrictions.h"

				#include "base/trace_event/trace_event.h"

				#include "third_party/leveldatabase/chromium_logger.h"

				#include "third_party/re2/re2/re2.h"

				#include "third_party/re2/src/re2/re2.h"

				using base::FilePath;

				using leveldb::FileLock;

5

third_party/re2/.gitignore vendored

 @ -1,5 +0,0 @@
 *.pyc
 *.orig
 core
 obj/
 benchlog.*

13

third_party/re2/AUTHORS vendored

 @ -1,13 +0,0 @@
 # This is the official list of RE2 authors for copyright purposes.
 # This file is distinct from the CONTRIBUTORS files.
 # See the latter for an explanation.
 # Names should be added to this file as
 #	Name or Organization <email address>
 # The email address is not required for organizations.
 # Please keep the list sorted.
 Google Inc.
 Samsung Electronics
 Stefano Rivera <stefano.rivera@gmail.com>

									
										121

third_party/re2/BUILD
									
										vendored
									
				@ -1,121 +0,0 @@

				# Copyright 2009 The RE2 Authors.  All Rights Reserved.

				# Use of this source code is governed by a BSD-style

				# license that can be found in the LICENSE file.

				# Bazel (http://bazel.io/) BUILD file for RE2.

				licenses(["notice"])

				cc_library(

				    name = "re2",

				    srcs = [

				        "re2/bitstate.cc",

				        "re2/compile.cc",

				        "re2/dfa.cc",

				        "re2/filtered_re2.cc",

				        "re2/mimics_pcre.cc",

				        "re2/nfa.cc",

				        "re2/onepass.cc",

				        "re2/parse.cc",

				        "re2/perl_groups.cc",

				        "re2/prefilter.cc",

				        "re2/prefilter.h",

				        "re2/prefilter_tree.cc",

				        "re2/prefilter_tree.h",

				        "re2/prog.cc",

				        "re2/prog.h",

				        "re2/re2.cc",

				        "re2/regexp.cc",

				        "re2/regexp.h",

				        "re2/set.cc",

				        "re2/simplify.cc",

				        "re2/stringpiece.cc",

				        "re2/tostring.cc",

				        "re2/unicode_casefold.cc",

				        "re2/unicode_casefold.h",

				        "re2/unicode_groups.cc",

				        "re2/unicode_groups.h",

				        "re2/walker-inl.h",

				        "util/atomicops.h",

				        "util/flags.h",

				        "util/hash.cc",

				        "util/logging.cc",

				        "util/logging.h",

				        "util/mutex.h",

				        "util/rune.cc",

				        "util/sparse_array.h",

				        "util/sparse_set.h",

				        "util/stringprintf.cc",

				        "util/strutil.cc",

				        "util/utf.h",

				        "util/util.h",

				        "util/valgrind.cc",

				        "util/valgrind.h",

				    ],

				    hdrs = [

				        "re2/filtered_re2.h",

				        "re2/re2.h",

				        "re2/set.h",

				        "re2/stringpiece.h",

				        "re2/variadic_function.h",

				    ],

				    includes = ["."],

				    linkopts = ["-pthread"],

				    visibility = ["//visibility:public"],

				)

				cc_library(

				    name = "test",

				    testonly = 1,

				    srcs = [

				        "re2/testing/backtrack.cc",

				        "re2/testing/dump.cc",

				        "re2/testing/exhaustive_tester.cc",

				        "re2/testing/null_walker.cc",

				        "re2/testing/regexp_generator.cc",

				        "re2/testing/string_generator.cc",

				        "re2/testing/tester.cc",

				        "util/pcre.cc",

				        "util/random.cc",

				        "util/test.cc",

				        "util/thread.cc",

				    ],

				    hdrs = [

				        "re2/testing/exhaustive_tester.h",

				        "re2/testing/regexp_generator.h",

				        "re2/testing/string_generator.h",

				        "re2/testing/tester.h",

				        "util/pcre.h",

				        "util/random.h",

				        "util/test.h",

				        "util/thread.h",

				    ],

				    includes = ["."],

				    deps = [":re2"],

				)

				load("re2_test", "re2_test")

				re2_test("charclass_test")

				re2_test("compile_test")

				re2_test("filtered_re2_test")

				re2_test("mimics_pcre_test")

				re2_test("parse_test")

				re2_test("possible_match_test")

				re2_test("re2_test")

				re2_test("re2_arg_test")

				re2_test("regexp_test")

				re2_test("required_prefix_test")

				re2_test("search_test")

				re2_test("set_test")

				re2_test("simplify_test")

				re2_test("string_generator_test")

				re2_test("dfa_test")

				re2_test("exhaustive1_test")

				re2_test("exhaustive2_test")

				re2_test("exhaustive3_test")

				re2_test("exhaustive_test")

				re2_test("random_test")

				# TODO: Add support for regexp_benchmark.

96

third_party/re2/BUILD.gn vendored

 @ -3,58 +3,58 @@
 # found in the LICENSE file.
 config("re2_config") {
   include_dirs = [ "." ]
   include_dirs = [ "src" ]
 }
 static_library("re2") {
   sources = [
     "re2/bitstate.cc",
     "re2/compile.cc",
     "re2/dfa.cc",
     "re2/filtered_re2.cc",
     "re2/filtered_re2.h",
     "re2/mimics_pcre.cc",
     "re2/nfa.cc",
     "re2/onepass.cc",
     "re2/parse.cc",
     "re2/perl_groups.cc",
     "re2/prefilter.cc",
     "re2/prefilter.h",
     "re2/prefilter_tree.cc",
     "re2/prefilter_tree.h",
     "re2/prog.cc",
     "re2/prog.h",
     "re2/re2.cc",
     "re2/re2.h",
     "re2/regexp.cc",
     "re2/regexp.h",
     "re2/set.cc",
     "re2/set.h",
     "re2/simplify.cc",
     "re2/stringpiece.cc",
     "re2/stringpiece.h",
     "re2/tostring.cc",
     "re2/unicode_casefold.cc",
     "re2/unicode_casefold.h",
     "re2/unicode_groups.cc",
     "re2/unicode_groups.h",
     "re2/variadic_function.h",
     "re2/walker-inl.h",
     "util/atomicops.h",
     "util/flags.h",
     "util/hash.cc",
     "util/logging.cc",
     "util/logging.h",
     "util/mutex.h",
     "util/rune.cc",
     "util/sparse_array.h",
     "util/sparse_set.h",
     "util/stringprintf.cc",
     "util/strutil.cc",
     "util/utf.h",
     "util/util.h",
     "util/valgrind.cc",
     "util/valgrind.h",
     "src/re2/bitstate.cc",
     "src/re2/compile.cc",
     "src/re2/dfa.cc",
     "src/re2/filtered_re2.cc",
     "src/re2/filtered_re2.h",
     "src/re2/mimics_pcre.cc",
     "src/re2/nfa.cc",
     "src/re2/onepass.cc",
     "src/re2/parse.cc",
     "src/re2/perl_groups.cc",
     "src/re2/prefilter.cc",
     "src/re2/prefilter.h",
     "src/re2/prefilter_tree.cc",
     "src/re2/prefilter_tree.h",
     "src/re2/prog.cc",
     "src/re2/prog.h",
     "src/re2/re2.cc",
     "src/re2/re2.h",
     "src/re2/regexp.cc",
     "src/re2/regexp.h",
     "src/re2/set.cc",
     "src/re2/set.h",
     "src/re2/simplify.cc",
     "src/re2/stringpiece.cc",
     "src/re2/stringpiece.h",
     "src/re2/tostring.cc",
     "src/re2/unicode_casefold.cc",
     "src/re2/unicode_casefold.h",
     "src/re2/unicode_groups.cc",
     "src/re2/unicode_groups.h",
     "src/re2/variadic_function.h",
     "src/re2/walker-inl.h",
     "src/util/atomicops.h",
     "src/util/flags.h",
     "src/util/hash.cc",
     "src/util/logging.cc",
     "src/util/logging.h",
     "src/util/mutex.h",
     "src/util/rune.cc",
     "src/util/sparse_array.h",
     "src/util/sparse_set.h",
     "src/util/stringprintf.cc",
     "src/util/strutil.cc",
     "src/util/utf.h",
     "src/util/util.h",
     "src/util/valgrind.cc",
     "src/util/valgrind.h",
   ]
   configs -= [ "//build/config/compiler:chromium_code" ]

									
										112

third_party/re2/CMakeLists.txt
									
										vendored
									
				@ -1,112 +0,0 @@

				# Copyright 2015 The RE2 Authors.  All Rights Reserved.

				# Use of this source code is governed by a BSD-style

				# license that can be found in the LICENSE file.

				# Old enough to support Ubuntu Precise.

				cmake_minimum_required(VERSION 2.8.7)

				project(RE2 CXX)

				option(BUILD_SHARED_LIBS "build shared libraries" OFF)

				option(USEPCRE "use PCRE in tests and benchmarks" OFF)

				set(EXTRA_TARGET_LINK_LIBRARIES)

				if(WIN32)

				  add_definitions(-DUNICODE -D_UNICODE -DSTRICT -DNOMINMAX)

				  set(THREADING threadwin)

				else()

				  set(THREADING thread)

				  list(APPEND EXTRA_TARGET_LINK_LIBRARIES -pthread)

				endif()

				if(USEPCRE)

				  add_definitions(-DUSEPCRE)

				  list(APPEND EXTRA_TARGET_LINK_LIBRARIES pcre)

				endif()

				include_directories(${CMAKE_SOURCE_DIR})

				set(RE2_LIBRARY_SOURCES

				    re2/bitstate.cc

				    re2/compile.cc

				    re2/dfa.cc

				    re2/filtered_re2.cc

				    re2/mimics_pcre.cc

				    re2/nfa.cc

				    re2/onepass.cc

				    re2/parse.cc

				    re2/perl_groups.cc

				    re2/prefilter.cc

				    re2/prefilter_tree.cc

				    re2/prog.cc

				    re2/re2.cc

				    re2/regexp.cc

				    re2/set.cc

				    re2/simplify.cc

				    re2/stringpiece.cc

				    re2/tostring.cc

				    re2/unicode_casefold.cc

				    re2/unicode_groups.cc

				    util/hash.cc

				    util/logging.cc

				    util/rune.cc

				    util/stringprintf.cc

				    util/strutil.cc

				    util/valgrind.cc

				    )

				add_library(re2 ${RE2_LIBRARY_SOURCES})

				set(TEST_LIBRARY_SOURCES

				    re2/testing/backtrack.cc

				    re2/testing/dump.cc

				    re2/testing/exhaustive_tester.cc

				    re2/testing/null_walker.cc

				    re2/testing/regexp_generator.cc

				    re2/testing/string_generator.cc

				    re2/testing/tester.cc

				    util/pcre.cc

				    util/random.cc

				    util/${THREADING}.cc

				    )

				add_library(test STATIC ${TEST_LIBRARY_SOURCES} util/test.cc)

				add_library(benchmark STATIC ${TEST_LIBRARY_SOURCES} util/benchmark.cc)

				set(TEST_TARGETS

				    charclass_test

				    compile_test

				    filtered_re2_test

				    mimics_pcre_test

				    parse_test

				    possible_match_test

				    re2_test

				    re2_arg_test

				    regexp_test

				    required_prefix_test

				    search_test

				    set_test

				    simplify_test

				    string_generator_test

				    dfa_test

				    exhaustive1_test

				    exhaustive2_test

				    exhaustive3_test

				    exhaustive_test

				    random_test

				    )

				set(BENCHMARK_TARGETS

				    regexp_benchmark

				    )

				foreach(target ${TEST_TARGETS})

				  add_executable(${target} re2/testing/${target}.cc)

				  target_link_libraries(${target} test re2 ${EXTRA_TARGET_LINK_LIBRARIES})

				endforeach(target)

				foreach(target ${BENCHMARK_TARGETS})

				  add_executable(${target} re2/testing/${target}.cc)

				  target_link_libraries(${target} benchmark re2 ${EXTRA_TARGET_LINK_LIBRARIES})

				endforeach(target)

									
										2

third_party/re2/CONTRIBUTING.md
									
										vendored
									
				@ -1,2 +0,0 @@

				RE2 uses Gerrit instead of GitHub pull requests.

				See the [Contributing](https://github.com/google/re2/wiki/Contribute) wiki page.

41

third_party/re2/CONTRIBUTORS vendored

 @ -1,41 +0,0 @@
 # This is the official list of people who can contribute
 # (and typically have contributed) code to the RE2 repository.
 # The AUTHORS file lists the copyright holders; this file
 # lists people.  For example, Google employees are listed here
 # but not in AUTHORS, because Google holds the copyright.
 #
 # The submission process automatically checks to make sure
 # that people submitting code are listed in this file (by email address).
 #
 # Names should be added to this file only after verifying that
 # the individual or the individual's organization has agreed to
 # the appropriate Contributor License Agreement, found here:
 #
 #     http://code.google.com/legal/individual-cla-v1.0.html
 #     http://code.google.com/legal/corporate-cla-v1.0.html
 #
 # The agreement for individuals can be filled out on the web.
 #
 # When adding J Random Contributor's name to this file,
 # either J's name or J's organization's name should be
 # added to the AUTHORS file, depending on whether the
 # individual or corporate CLA was used.
 # Names should be added to this file like so:
 #     Name <email address>
 # Please keep the list sorted.
 Dominic Battré <battre@chromium.org>
 Doug Kwan <dougkwan@google.com>
 Dmitriy Vyukov <dvyukov@google.com>
 John Millikin <jmillikin@gmail.com>
 Mike Nazarewicz <mpn@google.com>
 Nico Weber <thakis@chromium.org>
 Pawel Hajdan <phajdan.jr@gmail.com>
 Rob Pike <r@google.com>
 Russ Cox <rsc@swtch.com>
 Sanjay Ghemawat <sanjay@google.com>
 Stefano Rivera <stefano.rivera@gmail.com>
 Srinivasan Venkatachary <vsri@google.com>
 Viatcheslav Ostapenko <sl.ostapenko@samsung.com>

									
										310

third_party/re2/Makefile
									
										vendored
									
				@ -1,310 +0,0 @@

				# Copyright 2009 The RE2 Authors.  All Rights Reserved.

				# Use of this source code is governed by a BSD-style

				# license that can be found in the LICENSE file.

				# to build against PCRE for testing or benchmarking,

				# uncomment the next two lines

				# CCPCRE=-I/usr/local/include -DUSEPCRE

				# LDPCRE=-L/usr/local/lib -lpcre

				CXX?=g++

				CXXFLAGS?=-O3 -g  # can override

				RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCPCRE)  # required

				LDFLAGS?=-pthread

				AR?=ar

				ARFLAGS?=rsc

				NM?=nm

				NMFLAGS?=-p

				# Variables mandated by GNU, the arbiter of all good taste on the internet.

				# http://www.gnu.org/prep/standards/standards.html

				prefix=/usr/local

				exec_prefix=$(prefix)

				bindir=$(exec_prefix)/bin

				includedir=$(prefix)/include

				libdir=$(exec_prefix)/lib

				INSTALL=install

				INSTALL_PROGRAM=$(INSTALL)

				INSTALL_DATA=$(INSTALL) -m 644

				# ABI version

				# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html

				SONAME=0

				# To rebuild the Tables generated by Perl and Python scripts (requires Internet

				# access for Unicode data), uncomment the following line:

				# REBUILD_TABLES=1

				ifeq ($(shell uname),Darwin)

				SOEXT=dylib

				SOEXTVER=$(SONAME).$(SOEXT)

				SOEXTVER00=$(SONAME).0.0.$(SOEXT)

				MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib $(LDFLAGS) -Wl,-install_name,@rpath/libre2.$(SOEXTVER) -exported_symbols_list libre2.symbols.darwin

				else ifeq ($(shell uname),SunOS)

				SOEXT=so

				SOEXTVER=$(SOEXT).$(SONAME)

				SOEXTVER00=$(SOEXT).$(SONAME).0.0

				MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),-M,libre2.symbols $(LDFLAGS)

				else

				SOEXT=so

				SOEXTVER=$(SOEXT).$(SONAME)

				SOEXTVER00=$(SOEXT).$(SONAME).0.0

				MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols $(LDFLAGS)

				endif

				all: obj/libre2.a obj/so/libre2.$(SOEXT)

				INSTALL_HFILES=\

					re2/filtered_re2.h\

					re2/re2.h\

					re2/set.h\

					re2/stringpiece.h\

					re2/variadic_function.h\

				HFILES=\

					util/atomicops.h\

					util/benchmark.h\

					util/flags.h\

					util/logging.h\

					util/mutex.h\

					util/pcre.h\

					util/random.h\

					util/sparse_array.h\

					util/sparse_set.h\

					util/test.h\

					util/thread.h\

					util/utf.h\

					util/util.h\

					util/valgrind.h\

					re2/filtered_re2.h\

					re2/prefilter.h\

					re2/prefilter_tree.h\

					re2/prog.h\

					re2/re2.h\

					re2/regexp.h\

					re2/set.h\

					re2/stringpiece.h\

					re2/testing/exhaustive_tester.h\

					re2/testing/regexp_generator.h\

					re2/testing/string_generator.h\

					re2/testing/tester.h\

					re2/unicode_casefold.h\

					re2/unicode_groups.h\

					re2/variadic_function.h\

					re2/walker-inl.h\

				OFILES=\

					obj/util/hash.o\

					obj/util/logging.o\

					obj/util/rune.o\

					obj/util/stringprintf.o\

					obj/util/strutil.o\

					obj/util/valgrind.o\

					obj/re2/bitstate.o\

					obj/re2/compile.o\

					obj/re2/dfa.o\

					obj/re2/filtered_re2.o\

					obj/re2/mimics_pcre.o\

					obj/re2/nfa.o\

					obj/re2/onepass.o\

					obj/re2/parse.o\

					obj/re2/perl_groups.o\

					obj/re2/prefilter.o\

					obj/re2/prefilter_tree.o\

					obj/re2/prog.o\

					obj/re2/re2.o\

					obj/re2/regexp.o\

					obj/re2/set.o\

					obj/re2/simplify.o\

					obj/re2/stringpiece.o\

					obj/re2/tostring.o\

					obj/re2/unicode_casefold.o\

					obj/re2/unicode_groups.o\

				TESTOFILES=\

					obj/util/pcre.o\

					obj/util/random.o\

					obj/util/thread.o\

					obj/re2/testing/backtrack.o\

					obj/re2/testing/dump.o\

					obj/re2/testing/exhaustive_tester.o\

					obj/re2/testing/null_walker.o\

					obj/re2/testing/regexp_generator.o\

					obj/re2/testing/string_generator.o\

					obj/re2/testing/tester.o\

				TESTS=\

					obj/test/charclass_test\

					obj/test/compile_test\

					obj/test/filtered_re2_test\

					obj/test/mimics_pcre_test\

					obj/test/parse_test\

					obj/test/possible_match_test\

					obj/test/re2_test\

					obj/test/re2_arg_test\

					obj/test/regexp_test\

					obj/test/required_prefix_test\

					obj/test/search_test\

					obj/test/set_test\

					obj/test/simplify_test\

					obj/test/string_generator_test\

				BIGTESTS=\

					obj/test/dfa_test\

					obj/test/exhaustive1_test\

					obj/test/exhaustive2_test\

					obj/test/exhaustive3_test\

					obj/test/exhaustive_test\

					obj/test/random_test\

				SOFILES=$(patsubst obj/%,obj/so/%,$(OFILES))

				STESTOFILES=$(patsubst obj/%,obj/so/%,$(TESTOFILES))

				STESTS=$(patsubst obj/%,obj/so/%,$(TESTS))

				SBIGTESTS=$(patsubst obj/%,obj/so/%,$(BIGTESTS))

				DOFILES=$(patsubst obj/%,obj/dbg/%,$(OFILES))

				DTESTOFILES=$(patsubst obj/%,obj/dbg/%,$(TESTOFILES))

				DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))

				DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))

				obj/%.o: %.cc $(HFILES)

					@mkdir -p $$(dirname $@)

					$(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc

				obj/dbg/%.o: %.cc $(HFILES)

					@mkdir -p $$(dirname $@)

					$(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) $*.cc

				obj/so/%.o: %.cc $(HFILES)

					@mkdir -p $$(dirname $@)

					$(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc

				obj/libre2.a: $(OFILES)

					@mkdir -p obj

					$(AR) $(ARFLAGS) obj/libre2.a $(OFILES)

				obj/dbg/libre2.a: $(DOFILES)

					@mkdir -p obj/dbg

					$(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)

				obj/so/libre2.$(SOEXT): $(SOFILES)

					@mkdir -p obj/so

					$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES)

					ln -sf libre2.$(SOEXTVER) $@

				obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o

					@mkdir -p obj/test

					$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(LDFLAGS) $(LDPCRE)

				obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o

					@mkdir -p obj/dbg/test

					$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(LDFLAGS) $(LDPCRE)

				obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/so/re2/testing/%.o $(STESTOFILES) obj/so/util/test.o

					@mkdir -p obj/so/test

					$(CXX) -o $@ obj/so/re2/testing/$*.o $(STESTOFILES) obj/so/util/test.o -Lobj/so -lre2 obj/libre2.a $(LDFLAGS) $(LDPCRE)

				obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o

					@mkdir -p obj/test

					$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o obj/libre2.a $(LDFLAGS) $(LDPCRE)

				ifdef REBUILD_TABLES

				re2/perl_groups.cc: re2/make_perl_groups.pl

					perl $< > $@

				re2/unicode_%.cc: re2/make_unicode_%.py

					python $< > $@

				.PRECIOUS: re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc

				endif

				distclean: clean

					rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc

				clean:

					rm -rf obj

					rm -f re2/*.pyc

				testofiles: $(TESTOFILES)

				test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test

				debug-test: $(DTESTS)

					@echo

					@echo Running debug binary tests.

					@echo

					@./runtests $(DTESTS)

				static-test: $(TESTS)

					@echo

					@echo Running static binary tests.

					@echo

					@./runtests $(TESTS)

				shared-test: $(STESTS)

					@echo

					@echo Running dynamic binary tests.

					@echo

					@LD_LIBRARY_PATH=obj/so:$(LD_LIBRARY_PATH) ./runtests $(STESTS)

				debug-bigtest: $(DTESTS) $(DBIGTESTS)

					@./runtests $(DTESTS) $(DBIGTESTS)

				static-bigtest: $(TESTS) $(BIGTESTS)

					@./runtests $(TESTS) $(BIGTESTS)

				shared-bigtest: $(STESTS) $(SBIGTESTS)

					@LD_LIBRARY_PATH=obj/so:$(LD_LIBRARY_PATH) ./runtests $(STESTS) $(SBIGTESTS)

				benchmark: obj/test/regexp_benchmark

				install: obj/libre2.a obj/so/libre2.$(SOEXT)

					mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig

					$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2

					$(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a

					$(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00)

					ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER)

					ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT)

					sed -e "s#@prefix@#${prefix}#" re2.pc >$(DESTDIR)$(libdir)/pkgconfig/re2.pc

				testinstall:

					@mkdir -p obj

					cp testinstall.cc obj

				ifneq ($(shell uname),Darwin)

					(cd obj && $(CXX) -I$(DESTDIR)$(includedir) -L$(DESTDIR)$(libdir) testinstall.cc -lre2 -pthread -static -o testinstall)

					obj/testinstall

				endif

					(cd obj && $(CXX) -I$(DESTDIR)$(includedir) -L$(DESTDIR)$(libdir) testinstall.cc -lre2 -pthread -o testinstall)

					LD_LIBRARY_PATH=$(DESTDIR)$(libdir) obj/testinstall

				benchlog: obj/test/regexp_benchmark

					(echo '==BENCHMARK==' `hostname` `date`; \

					  (uname -a; $(CXX) --version; git rev-parse --short HEAD; file obj/test/regexp_benchmark) | sed 's/^/# /'; \

					  echo; \

					  ./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')

				# Keep gmake from deleting intermediate files it creates.

				# This makes repeated builds faster and preserves debug info on OS X.

				.PRECIOUS: obj/%.o obj/dbg/%.o obj/so/%.o obj/libre2.a \

					obj/dbg/libre2.a obj/so/libre2.a \

					obj/test/% obj/so/test/% obj/dbg/test/%

				log:

					$(MAKE) clean

					$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" \

						$(filter obj/test/exhaustive%_test,$(BIGTESTS))

					echo '#' RE2 exhaustive tests built by make log >re2-exhaustive.txt

					echo '#' $$(date) >>re2-exhaustive.txt

					obj/test/exhaustive_test |grep -v '^PASS$$' >>re2-exhaustive.txt

					obj/test/exhaustive1_test |grep -v '^PASS$$' >>re2-exhaustive.txt

					obj/test/exhaustive2_test |grep -v '^PASS$$' >>re2-exhaustive.txt

					obj/test/exhaustive3_test |grep -v '^PASS$$' >>re2-exhaustive.txt

					$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" obj/test/search_test

					echo '#' RE2 basic search tests built by make $@ >re2-search.txt

					echo '#' $$(date) >>re2-search.txt

					obj/test/search_test |grep -v '^PASS$$' >>re2-search.txt

				x: x.cc obj/libre2.a

					g++ -I. -o x x.cc obj/libre2.a

32

third_party/re2/README vendored

 @ -1,32 +0,0 @@
 This is the source code repository for RE2, a regular expression library.
 For documentation about how to install and use RE2,
 visit https://github.com/google/re2/.
 The short version is:
 make
 make test
 make install
 make testinstall
 More information can be found on the wiki:
 https://github.com/google/re2/wiki
 Issue tracker:
 https://github.com/google/re2/issues
 Mailing list:
 https://groups.google.com/group/re2-dev
 Unless otherwise noted, the RE2 source files are distributed
 under the BSD-style license found in the LICENSE file.
 RE2's native language is C++.
 An Erlang wrapper is at https://github.com/tuncer/re2/.
 An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
 A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM.
 An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM.
 A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN.
 A Python wrapper is at https://github.com/facebook/pyre2/.
 A Ruby wrapper is at https://github.com/axic/rre2/.

									
										5

third_party/re2/WORKSPACE
									
										vendored
									
				@ -1,5 +0,0 @@

				# Copyright 2009 The RE2 Authors.  All Rights Reserved.

				# Use of this source code is governed by a BSD-style

				# license that can be found in the LICENSE file.

				# Bazel (http://bazel.io/) WORKSPACE file for RE2.

2211

third_party/re2/benchlog/benchlog.c2 vendored

File diff suppressed because it is too large Load Diff

582

third_party/re2/benchlog/benchlog.mini vendored

 @ -1,582 +0,0 @@
 hw.ncpu: 2
 hw.byteorder: 1234
 hw.memsize: 4294967296
 hw.activecpu: 2
 hw.physicalcpu: 2
 hw.physicalcpu_max: 2
 hw.logicalcpu: 2
 hw.logicalcpu_max: 2
 hw.cputype: 7
 hw.cpusubtype: 4
 hw.cpu64bit_capable: 1
 hw.cpufamily: 1114597871
 hw.cacheconfig: 2 1 2 0 0 0 0 0 0 0
 hw.cachesize: 3221225472 32768 2097152 0 0 0 0 0 0 0
 hw.pagesize: 4096
 hw.busfrequency: 664000000
 hw.busfrequency_min: 664000000
 hw.busfrequency_max: 664000000
 hw.cpufrequency: 1830000000
 hw.cpufrequency_min: 1830000000
 hw.cpufrequency_max: 1830000000
 hw.cachelinesize: 64
 hw.l1icachesize: 32768
 hw.l1dcachesize: 32768
 hw.l2cachesize: 2097152
 hw.tbfrequency: 1000000000
 hw.packages: 1
 hw.optional.floatingpoint: 1
 hw.optional.mmx: 1
 hw.optional.sse: 1
 hw.optional.sse2: 1
 hw.optional.sse3: 1
 hw.optional.supplementalsse3: 1
 hw.optional.sse4_1: 0
 hw.optional.sse4_2: 0
 hw.optional.x86_64: 1
 hw.machine = i386
 hw.model = Macmini2,1
 hw.ncpu = 2
 hw.byteorder = 1234
 hw.physmem = 2147483648
 hw.usermem = 1849147392
 hw.pagesize = 4096
 hw.epoch = 0
 hw.vectorunit = 1
 hw.busfrequency = 664000000
 hw.cpufrequency = 1830000000
 hw.cachelinesize = 64
 hw.l1icachesize = 32768
 hw.l1dcachesize = 32768
 hw.l2settings = 1
 hw.l2cachesize = 2097152
 hw.tbfrequency = 1000000000
 hw.memsize = 4294967296
 hw.availcpu = 2
 machdep.cpu.max_basic: 10
 machdep.cpu.max_ext: 2147483656
 machdep.cpu.vendor: GenuineIntel
 machdep.cpu.brand_string: Intel(R) Core(TM)2 CPU         T5600  @ 1.83GHz
 machdep.cpu.family: 6
 machdep.cpu.model: 15
 machdep.cpu.extmodel: 0
 machdep.cpu.extfamily: 0
 machdep.cpu.stepping: 2
 machdep.cpu.feature_bits: 3219913727 58301
 machdep.cpu.extfeature_bits: 537921536 1
 machdep.cpu.signature: 1778
 machdep.cpu.brand: 0
 machdep.cpu.features:  FPU VME DE PSE TSC MSR PAE MCE CX8 APIC SEP MTRR PGE MCA CMOV PAT PSE36 CLFSH DS ACPI MMX FXSR SSE SSE2 SS HTT TM SSE3 MON DSCPL VMX EST TM2 SSSE3 CX16 TPR PDCM
 machdep.cpu.extfeatures:  SYSCALL XD EM64T
 machdep.cpu.logical_per_package: 2
 machdep.cpu.cores_per_package: 2
 machdep.cpu.microcode_version: 87
 machdep.cpu.mwait.linesize_min: 64
 machdep.cpu.mwait.linesize_max: 64
 machdep.cpu.mwait.extensions: 3
 machdep.cpu.mwait.sub_Cstates: 139808
 machdep.cpu.thermal.sensor: 1
 machdep.cpu.thermal.dynamic_acceleration: 0
 machdep.cpu.thermal.thresholds: 2
 machdep.cpu.thermal.ACNT_MCNT: 1
 machdep.cpu.arch_perf.version: 2
 machdep.cpu.arch_perf.number: 2
 machdep.cpu.arch_perf.width: 40
 machdep.cpu.arch_perf.events_number: 7
 machdep.cpu.arch_perf.events: 0
 machdep.cpu.arch_perf.fixed_number: 0
 machdep.cpu.arch_perf.fixed_width: 0
 machdep.cpu.cache.linesize: 64
 machdep.cpu.cache.L2_associativity: 6
 machdep.cpu.cache.size: 2048
 machdep.cpu.tlb.inst.small: 128
 machdep.cpu.tlb.inst.large: 8
 machdep.cpu.tlb.data.small: 16
 machdep.cpu.tlb.data.small_level1: 256
 machdep.cpu.tlb.data.large: 16
 machdep.cpu.tlb.data.large_level1: 32
 machdep.cpu.address_bits.physical: 36
 machdep.cpu.address_bits.virtual: 48
 machdep.cpu.core_count: 2
 machdep.cpu.thread_count: 2
 ==BENCHMARK== mini.local Fri Feb 26 16:57:10 PST 2010
 # Darwin mini.local 10.2.0 Darwin Kernel Version 10.2.0: Tue Nov  3 10:37:10 PST 2009; root:xnu-1486.2.11~1/RELEASE_I386 i386
 # i686-apple-darwin10-g++-4.2.1 (GCC) 4.2.1 (Apple Inc. build 5646) (dot 1)
 # Copyright (C) 2007 Free Software Foundation, Inc.
 # This is free software; see the source for copying conditions.  There is NO
 # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # a94585d91e66+ tip
 # obj/test/regexp_benchmark: Mach-O 64-bit executable x86_64
 Search_Easy0_CachedPCRE/8	10000000	       176 ns/op	  45.40 MB/s
 Search_Easy0_CachedPCRE/16	10000000	       209 ns/op	  76.41 MB/s
 Search_Easy0_CachedPCRE/32	10000000	       269 ns/op	 118.53 MB/s
 Search_Easy0_CachedPCRE/64	 5000000	       398 ns/op	 160.77 MB/s
 Search_Easy0_CachedPCRE/128	 5000000	       536 ns/op	 238.69 MB/s
 Search_Easy0_CachedPCRE/256	 2000000	       897 ns/op	 285.22 MB/s
 Search_Easy0_CachedPCRE/512	 1000000	      2161 ns/op	 236.92 MB/s
 Search_Easy0_CachedPCRE/1K	  500000	      4769 ns/op	 214.70 MB/s
 Search_Easy0_CachedPCRE/2K	  200000	      8031 ns/op	 255.00 MB/s
 Search_Easy0_CachedPCRE/4K	  100000	     16208 ns/op	 252.71 MB/s
 Search_Easy0_CachedPCRE/8K	   50000	     32219 ns/op	 254.26 MB/s
 Search_Easy0_CachedPCRE/16K	   50000	     63347 ns/op	 258.64 MB/s
 Search_Easy0_CachedPCRE/32K	   10000	    125875 ns/op	 260.32 MB/s
 Search_Easy0_CachedPCRE/64K	   10000	    247829 ns/op	 264.44 MB/s
 Search_Easy0_CachedPCRE/128K	    5000	    498699 ns/op	 262.83 MB/s
 Search_Easy0_CachedPCRE/256K	    2000	    978021 ns/op	 268.04 MB/s
 Search_Easy0_CachedPCRE/512K	    1000	   1975059 ns/op	 265.45 MB/s
 Search_Easy0_CachedPCRE/1M	     500	   3994258 ns/op	 262.52 MB/s
 Search_Easy0_CachedPCRE/2M	     200	   7959640 ns/op	 263.47 MB/s
 Search_Easy0_CachedPCRE/4M	     100	  15950300 ns/op	 262.96 MB/s
 Search_Easy0_CachedPCRE/8M	      50	  32435540 ns/op	 258.62 MB/s
 Search_Easy0_CachedPCRE/16M	      50	  64686180 ns/op	 259.36 MB/s
 Search_Easy0_CachedRE2/8	 5000000	       535 ns/op	  14.95 MB/s
 Search_Easy0_CachedRE2/16	 5000000	       557 ns/op	  28.70 MB/s
 Search_Easy0_CachedRE2/32	 5000000	       595 ns/op	  53.75 MB/s
 Search_Easy0_CachedRE2/64	 5000000	       643 ns/op	  99.50 MB/s
 Search_Easy0_CachedRE2/128	 2000000	       759 ns/op	 168.64 MB/s
 Search_Easy0_CachedRE2/256	 2000000	       972 ns/op	 263.30 MB/s
 Search_Easy0_CachedRE2/512	 1000000	      1458 ns/op	 351.13 MB/s
 Search_Easy0_CachedRE2/1K	 1000000	      2544 ns/op	 402.51 MB/s
 Search_Easy0_CachedRE2/2K	  500000	      4551 ns/op	 449.99 MB/s
 Search_Easy0_CachedRE2/4K	  200000	      8677 ns/op	 472.01 MB/s
 Search_Easy0_CachedRE2/8K	  100000	     17188 ns/op	 476.59 MB/s
 Search_Easy0_CachedRE2/16K	   50000	     33869 ns/op	 483.73 MB/s
 Search_Easy0_CachedRE2/32K	   50000	     67787 ns/op	 483.39 MB/s
 Search_Easy0_CachedRE2/64K	   10000	    133362 ns/op	 491.41 MB/s
 Search_Easy0_CachedRE2/128K	   10000	    266469 ns/op	 491.88 MB/s
 Search_Easy0_CachedRE2/256K	    5000	    536980 ns/op	 488.18 MB/s
 Search_Easy0_CachedRE2/512K	    2000	   1050843 ns/op	 498.92 MB/s
 Search_Easy0_CachedRE2/1M	    1000	   2120649 ns/op	 494.46 MB/s
 Search_Easy0_CachedRE2/2M	     500	   4273918 ns/op	 490.69 MB/s
 Search_Easy0_CachedRE2/4M	     200	   8591285 ns/op	 488.20 MB/s
 Search_Easy0_CachedRE2/8M	     100	  17197390 ns/op	 487.78 MB/s
 Search_Easy0_CachedRE2/16M	      50	  34338780 ns/op	 488.58 MB/s
 Search_Easy1_CachedPCRE/8	10000000	       174 ns/op	  45.74 MB/s
 Search_Easy1_CachedPCRE/16	10000000	       206 ns/op	  77.31 MB/s
 Search_Easy1_CachedPCRE/32	10000000	       270 ns/op	 118.43 MB/s
 Search_Easy1_CachedPCRE/64	 5000000	       402 ns/op	 159.07 MB/s
 Search_Easy1_CachedPCRE/128	 5000000	       540 ns/op	 236.84 MB/s
 Search_Easy1_CachedPCRE/256	 2000000	       909 ns/op	 281.34 MB/s
 Search_Easy1_CachedPCRE/512	 1000000	      1852 ns/op	 276.34 MB/s
 Search_Easy1_CachedPCRE/1K	  500000	      4318 ns/op	 237.12 MB/s
 Search_Easy1_CachedPCRE/2K	  200000	      8346 ns/op	 245.37 MB/s
 Search_Easy1_CachedPCRE/4K	  100000	     16214 ns/op	 252.62 MB/s
 Search_Easy1_CachedPCRE/8K	   50000	     32438 ns/op	 252.54 MB/s
 Search_Easy1_CachedPCRE/16K	   50000	     62914 ns/op	 260.42 MB/s
 Search_Easy1_CachedPCRE/32K	   10000	    124792 ns/op	 262.58 MB/s
 Search_Easy1_CachedPCRE/64K	   10000	    250941 ns/op	 261.16 MB/s
 Search_Easy1_CachedPCRE/128K	    5000	    498405 ns/op	 262.98 MB/s
 Search_Easy1_CachedPCRE/256K	    2000	    997305 ns/op	 262.85 MB/s
 Search_Easy1_CachedPCRE/512K	    1000	   2023179 ns/op	 259.14 MB/s
 Search_Easy1_CachedPCRE/1M	     500	   4005202 ns/op	 261.80 MB/s
 Search_Easy1_CachedPCRE/2M	     200	   8116410 ns/op	 258.38 MB/s
 Search_Easy1_CachedPCRE/4M	     100	  16145970 ns/op	 259.77 MB/s
 Search_Easy1_CachedPCRE/8M	      50	  32471260 ns/op	 258.34 MB/s
 Search_Easy1_CachedPCRE/16M	      50	  64734020 ns/op	 259.17 MB/s
 Search_Easy1_CachedRE2/8	 5000000	       543 ns/op	  14.72 MB/s
 Search_Easy1_CachedRE2/16	 5000000	       570 ns/op	  28.07 MB/s
 Search_Easy1_CachedRE2/32	 5000000	       605 ns/op	  52.81 MB/s
 Search_Easy1_CachedRE2/64	 5000000	       643 ns/op	  99.39 MB/s
 Search_Easy1_CachedRE2/128	 2000000	       764 ns/op	 167.45 MB/s
 Search_Easy1_CachedRE2/256	 2000000	       970 ns/op	 263.85 MB/s
 Search_Easy1_CachedRE2/512	 1000000	      1455 ns/op	 351.75 MB/s
 Search_Easy1_CachedRE2/1K	 1000000	      2506 ns/op	 408.48 MB/s
 Search_Easy1_CachedRE2/2K	  500000	      4571 ns/op	 447.97 MB/s
 Search_Easy1_CachedRE2/4K	  200000	      8812 ns/op	 464.81 MB/s
 Search_Easy1_CachedRE2/8K	  100000	     17079 ns/op	 479.65 MB/s
 Search_Easy1_CachedRE2/16K	   50000	     33802 ns/op	 484.70 MB/s
 Search_Easy1_CachedRE2/32K	   50000	     67171 ns/op	 487.83 MB/s
 Search_Easy1_CachedRE2/64K	   10000	    131505 ns/op	 498.35 MB/s
 Search_Easy1_CachedRE2/128K	   10000	    263228 ns/op	 497.94 MB/s
 Search_Easy1_CachedRE2/256K	    5000	    528135 ns/op	 496.36 MB/s
 Search_Easy1_CachedRE2/512K	    2000	   1052768 ns/op	 498.01 MB/s
 Search_Easy1_CachedRE2/1M	    1000	   2112714 ns/op	 496.32 MB/s
 Search_Easy1_CachedRE2/2M	     500	   4289478 ns/op	 488.91 MB/s
 Search_Easy1_CachedRE2/4M	     200	   8519430 ns/op	 492.32 MB/s
 Search_Easy1_CachedRE2/8M	     100	  17002860 ns/op	 493.36 MB/s
 Search_Easy1_CachedRE2/16M	      50	  34341100 ns/op	 488.55 MB/s
 Search_Medium_CachedPCRE/8	10000000	       175 ns/op	  45.48 MB/s
 Search_Medium_CachedPCRE/16	10000000	       206 ns/op	  77.31 MB/s
 Search_Medium_CachedPCRE/32	10000000	       273 ns/op	 117.10 MB/s
 Search_Medium_CachedPCRE/64	 5000000	       427 ns/op	 149.60 MB/s
 Search_Medium_CachedPCRE/128	  200000	      9382 ns/op	  13.64 MB/s
 Search_Medium_CachedPCRE/256	  100000	     15339 ns/op	  16.69 MB/s
 Search_Medium_CachedPCRE/512	   50000	     35837 ns/op	  14.29 MB/s
 Search_Medium_CachedPCRE/1K	   50000	     71109 ns/op	  14.40 MB/s
 Search_Medium_CachedPCRE/2K	   10000	    111371 ns/op	  18.39 MB/s
 Search_Medium_CachedPCRE/4K	   10000	    264964 ns/op	  15.46 MB/s
 Search_Medium_CachedPCRE/8K	    5000	    554964 ns/op	  14.76 MB/s
 Search_Medium_CachedPCRE/16K	    2000	   1122116 ns/op	  14.60 MB/s
 Search_Medium_CachedPCRE/32K	    1000	   2305129 ns/op	  14.22 MB/s
 Search_Medium_CachedPCRE/64K	     500	   4401888 ns/op	  14.89 MB/s
 Search_Medium_CachedPCRE/128K	     200	   8591800 ns/op	  15.26 MB/s
 Search_Medium_CachedPCRE/256K	     100	  17534580 ns/op	  14.95 MB/s
 Search_Medium_CachedRE2/8	 5000000	       531 ns/op	  15.06 MB/s
 Search_Medium_CachedRE2/16	 5000000	       579 ns/op	  27.60 MB/s
 Search_Medium_CachedRE2/32	 5000000	       666 ns/op	  47.99 MB/s
 Search_Medium_CachedRE2/64	 2000000	       817 ns/op	  78.31 MB/s
 Search_Medium_CachedRE2/128	 1000000	      1174 ns/op	 108.94 MB/s
 Search_Medium_CachedRE2/256	 1000000	      1824 ns/op	 140.30 MB/s
 Search_Medium_CachedRE2/512	  500000	      3097 ns/op	 165.29 MB/s
 Search_Medium_CachedRE2/1K	  500000	      6101 ns/op	 167.84 MB/s
 Search_Medium_CachedRE2/2K	  200000	     12024 ns/op	 170.32 MB/s
 Search_Medium_CachedRE2/4K	  100000	     21483 ns/op	 190.66 MB/s
 Search_Medium_CachedRE2/8K	   50000	     41321 ns/op	 198.25 MB/s
 Search_Medium_CachedRE2/16K	   20000	     82227 ns/op	 199.25 MB/s
 Search_Medium_CachedRE2/32K	   10000	    166314 ns/op	 197.02 MB/s
 Search_Medium_CachedRE2/64K	    5000	    334190 ns/op	 196.10 MB/s
 Search_Medium_CachedRE2/128K	    5000	    672222 ns/op	 194.98 MB/s
 Search_Medium_CachedRE2/256K	    2000	   1335691 ns/op	 196.26 MB/s
 Search_Medium_CachedRE2/512K	    1000	   2650973 ns/op	 197.77 MB/s
 Search_Medium_CachedRE2/1M	     500	   5401168 ns/op	 194.14 MB/s
 Search_Medium_CachedRE2/2M	     100	  10724160 ns/op	 195.55 MB/s
 Search_Medium_CachedRE2/4M	     100	  21647840 ns/op	 193.75 MB/s
 Search_Medium_CachedRE2/8M	      50	  43369000 ns/op	 193.42 MB/s
 Search_Medium_CachedRE2/16M	      20	  85095750 ns/op	 197.16 MB/s
 Search_Hard_CachedPCRE/8	10000000	       178 ns/op	  44.77 MB/s
 Search_Hard_CachedPCRE/16	10000000	       211 ns/op	  75.54 MB/s
 Search_Hard_CachedPCRE/32	10000000	       274 ns/op	 116.75 MB/s
 Search_Hard_CachedPCRE/64	 5000000	       401 ns/op	 159.58 MB/s
 Search_Hard_CachedPCRE/128	    5000	    331833 ns/op	   0.39 MB/s
 Search_Hard_CachedPCRE/256	    2000	   1299658 ns/op	   0.20 MB/s
 Search_Hard_CachedPCRE/512	     500	   5361070 ns/op	   0.10 MB/s
 Search_Hard_CachedPCRE/1K	     100	  20744900 ns/op	   0.05 MB/s
 Search_Hard_CachedPCRE/2K	      20	  78382950 ns/op	   0.03 MB/s
 Search_Hard_CachedPCRE/4K	       5	 335826800 ns/op	   0.01 MB/s
 Search_Hard_CachedRE2/8	 5000000	       550 ns/op	  14.53 MB/s
 Search_Hard_CachedRE2/16	 5000000	       600 ns/op	  26.66 MB/s
 Search_Hard_CachedRE2/32	 5000000	       683 ns/op	  46.80 MB/s
 Search_Hard_CachedRE2/64	 2000000	       834 ns/op	  76.69 MB/s
 Search_Hard_CachedRE2/128	 1000000	      1168 ns/op	 109.57 MB/s
 Search_Hard_CachedRE2/256	 1000000	      1833 ns/op	 139.65 MB/s
 Search_Hard_CachedRE2/512	  500000	      3069 ns/op	 166.81 MB/s
 Search_Hard_CachedRE2/1K	  500000	      5780 ns/op	 177.14 MB/s
 Search_Hard_CachedRE2/2K	  200000	     11060 ns/op	 185.17 MB/s
 Search_Hard_CachedRE2/4K	  100000	     21511 ns/op	 190.41 MB/s
 Search_Hard_CachedRE2/8K	   50000	     41962 ns/op	 195.22 MB/s
 Search_Hard_CachedRE2/16K	   20000	     82460 ns/op	 198.69 MB/s
 Search_Hard_CachedRE2/32K	   10000	    164209 ns/op	 199.55 MB/s
 Search_Hard_CachedRE2/64K	    5000	    326354 ns/op	 200.81 MB/s
 Search_Hard_CachedRE2/128K	    5000	    659142 ns/op	 198.85 MB/s
 Search_Hard_CachedRE2/256K	    2000	   1333642 ns/op	 196.56 MB/s
 Search_Hard_CachedRE2/512K	    1000	   2687422 ns/op	 195.09 MB/s
 Search_Hard_CachedRE2/1M	     500	   5351592 ns/op	 195.94 MB/s
 Search_Hard_CachedRE2/2M	     100	  10581690 ns/op	 198.19 MB/s
 Search_Hard_CachedRE2/4M	     100	  21324320 ns/op	 196.69 MB/s
 Search_Hard_CachedRE2/8M	      50	  41892520 ns/op	 200.24 MB/s
 Search_Hard_CachedRE2/16M	      20	  85475700 ns/op	 196.28 MB/s
 Search_Parens_CachedPCRE/8	10000000	       298 ns/op	  26.80 MB/s
 Search_Parens_CachedRE2/8	 5000000	       562 ns/op	  14.21 MB/s
 Search_Parens_CachedRE2/16	 5000000	       598 ns/op	  26.71 MB/s
 Search_Parens_CachedRE2/32	 5000000	       676 ns/op	  47.27 MB/s
 Search_Parens_CachedRE2/64	 2000000	       828 ns/op	  77.21 MB/s
 Search_Parens_CachedRE2/128	 1000000	      1155 ns/op	 110.73 MB/s
 Search_Parens_CachedRE2/256	 1000000	      1788 ns/op	 143.13 MB/s
 Search_Parens_CachedRE2/512	  500000	      3064 ns/op	 167.09 MB/s
 Search_Parens_CachedRE2/1K	  500000	      5698 ns/op	 179.69 MB/s
 Search_Parens_CachedRE2/2K	  200000	     10961 ns/op	 186.84 MB/s
 Search_Parens_CachedRE2/4K	  100000	     21527 ns/op	 190.27 MB/s
 Search_Parens_CachedRE2/8K	   50000	     41923 ns/op	 195.40 MB/s
 Search_Parens_CachedRE2/16K	   20000	     85505 ns/op	 191.61 MB/s
 Search_Parens_CachedRE2/32K	   10000	    164437 ns/op	 199.27 MB/s
 Search_Parens_CachedRE2/64K	    5000	    332654 ns/op	 197.01 MB/s
 Search_Parens_CachedRE2/128K	    5000	    677745 ns/op	 193.39 MB/s
 Search_Parens_CachedRE2/256K	    2000	   1331012 ns/op	 196.95 MB/s
 Search_Parens_CachedRE2/512K	    1000	   2692594 ns/op	 194.71 MB/s
 Search_Parens_CachedRE2/1M	     500	   5355880 ns/op	 195.78 MB/s
 Search_Parens_CachedRE2/2M	     100	  10822340 ns/op	 193.78 MB/s
 Search_Parens_CachedRE2/4M	     100	  21464430 ns/op	 195.41 MB/s
 Search_Parens_CachedRE2/8M	      50	  42875940 ns/op	 195.65 MB/s
 Search_Parens_CachedRE2/16M	      20	  84654300 ns/op	 198.19 MB/s
 Search_BigFixed_CachedPCRE/8	 5000000	       360 ns/op	  22.21 MB/s
 Search_BigFixed_CachedPCRE/16	 5000000	       442 ns/op	  36.15 MB/s
 Search_BigFixed_CachedPCRE/32	 5000000	       606 ns/op	  52.73 MB/s
 Search_BigFixed_CachedPCRE/64	 2000000	       935 ns/op	  68.39 MB/s
 Search_BigFixed_CachedPCRE/128	 1000000	      1525 ns/op	  83.91 MB/s
 Search_BigFixed_CachedPCRE/256	 1000000	      2718 ns/op	  94.18 MB/s
 Search_BigFixed_CachedPCRE/512	  500000	      5020 ns/op	 101.98 MB/s
 Search_BigFixed_CachedPCRE/1K	  200000	      9761 ns/op	 104.90 MB/s
 Search_BigFixed_CachedPCRE/2K	  100000	     19275 ns/op	 106.25 MB/s
 Search_BigFixed_CachedPCRE/4K	   50000	     38488 ns/op	 106.42 MB/s
 Search_BigFixed_CachedPCRE/8K	   20000	     76229 ns/op	 107.46 MB/s
 Search_BigFixed_CachedPCRE/16K	   10000	    155350 ns/op	 105.46 MB/s
 Search_BigFixed_CachedPCRE/32K	    5000	    309242 ns/op	 105.96 MB/s
 Search_BigFixed_CachedRE2/8	10000000	       194 ns/op	  41.03 MB/s
 Search_BigFixed_CachedRE2/16	 5000000	       589 ns/op	  27.15 MB/s
 Search_BigFixed_CachedRE2/32	 5000000	       655 ns/op	  48.83 MB/s
 Search_BigFixed_CachedRE2/64	 5000000	       715 ns/op	  89.46 MB/s
 Search_BigFixed_CachedRE2/128	 2000000	       882 ns/op	 145.09 MB/s
 Search_BigFixed_CachedRE2/256	 1000000	      1293 ns/op	 197.97 MB/s
 Search_BigFixed_CachedRE2/512	 1000000	      1924 ns/op	 266.06 MB/s
 Search_BigFixed_CachedRE2/1K	  500000	      3294 ns/op	 310.79 MB/s
 Search_BigFixed_CachedRE2/2K	  500000	      6057 ns/op	 338.10 MB/s
 Search_BigFixed_CachedRE2/4K	  200000	     11475 ns/op	 356.93 MB/s
 Search_BigFixed_CachedRE2/8K	  100000	     22395 ns/op	 365.79 MB/s
 Search_BigFixed_CachedRE2/16K	   50000	     44333 ns/op	 369.56 MB/s
 Search_BigFixed_CachedRE2/32K	   20000	     88061 ns/op	 372.11 MB/s
 Search_BigFixed_CachedRE2/64K	   10000	    173649 ns/op	 377.40 MB/s
 Search_BigFixed_CachedRE2/128K	    5000	    347251 ns/op	 377.46 MB/s
 Search_BigFixed_CachedRE2/256K	    2000	    702561 ns/op	 373.13 MB/s
 Search_BigFixed_CachedRE2/512K	    1000	   1408041 ns/op	 372.35 MB/s
 Search_BigFixed_CachedRE2/1M	     500	   3003070 ns/op	 349.17 MB/s
 Search_Success_PCRE/8	  500000	      3891 ns/op	   2.06 MB/s
 Search_Success_PCRE/16	  500000	      3865 ns/op	   4.14 MB/s
 Search_Success_PCRE/32	  500000	      3861 ns/op	   8.29 MB/s
 Search_Success_PCRE/64	  500000	      3921 ns/op	  16.32 MB/s
 Search_Success_PCRE/128	  500000	      4677 ns/op	  27.37 MB/s
 Search_Success_PCRE/256	  500000	      5362 ns/op	  47.73 MB/s
 Search_Success_PCRE/512	  500000	      7125 ns/op	  71.85 MB/s
 Search_Success_PCRE/1K	  200000	     10643 ns/op	  96.21 MB/s
 Search_Success_PCRE/2K	  100000	     17620 ns/op	 116.23 MB/s
 Search_Success_PCRE/4K	   50000	     31657 ns/op	 129.39 MB/s
 Search_Success_PCRE/8K	   50000	     59290 ns/op	 138.17 MB/s
 Search_Success_PCRE/16K	   10000	    115346 ns/op	 142.04 MB/s
 Search_Success_PCRE/32K	   10000	    225258 ns/op	 145.47 MB/s
 Search_Success_PCRE/64K	    5000	    452994 ns/op	 144.67 MB/s
 Search_Success_PCRE/128K	    2000	    904745 ns/op	 144.87 MB/s
 Search_Success_PCRE/256K	    1000	   1786683 ns/op	 146.72 MB/s
 Search_Success_PCRE/512K	     500	   3600316 ns/op	 145.62 MB/s
 Search_Success_PCRE/1M	     200	   7413055 ns/op	 141.45 MB/s
 Search_Success_PCRE/2M	     100	  15261930 ns/op	 137.41 MB/s
 Search_Success_PCRE/4M	      50	  32827960 ns/op	 127.77 MB/s
 Search_Success_PCRE/8M	      20	  73886450 ns/op	 113.53 MB/s
 Search_Success_PCRE/16M	       5	 247881200 ns/op	  67.68 MB/s
 Search_Success_RE2/8	  100000	     18948 ns/op	   0.42 MB/s
 Search_Success_RE2/16	   50000	     40076 ns/op	   0.40 MB/s
 Search_Success_RE2/32	   50000	     40543 ns/op	   0.79 MB/s
 Search_Success_RE2/64	   50000	     40520 ns/op	   1.58 MB/s
 Search_Success_RE2/128	   50000	     41222 ns/op	   3.11 MB/s
 Search_Success_RE2/256	   50000	     41361 ns/op	   6.19 MB/s
 Search_Success_RE2/512	   50000	     42418 ns/op	  12.07 MB/s
 Search_Success_RE2/1K	   50000	     45239 ns/op	  22.64 MB/s
 Search_Success_RE2/2K	   50000	     50568 ns/op	  40.50 MB/s
 Search_Success_RE2/4K	   50000	     60722 ns/op	  67.45 MB/s
 Search_Success_RE2/8K	   20000	     82046 ns/op	  99.85 MB/s
 Search_Success_RE2/16K	   10000	    125412 ns/op	 130.64 MB/s
 Search_Success_RE2/32K	   10000	    211805 ns/op	 154.71 MB/s
 Search_Success_RE2/64K	    5000	    373132 ns/op	 175.64 MB/s
 Search_Success_RE2/128K	    2000	    710166 ns/op	 184.57 MB/s
 Search_Success_RE2/256K	    2000	   1392231 ns/op	 188.29 MB/s
 Search_Success_RE2/512K	    1000	   2763051 ns/op	 189.75 MB/s
 Search_Success_RE2/1M	     500	   5547628 ns/op	 189.01 MB/s
 Search_Success_RE2/2M	     100	  11709090 ns/op	 179.10 MB/s
 Search_Success_RE2/4M	      50	  25220160 ns/op	 166.31 MB/s
 Search_Success_RE2/8M	      20	  59411600 ns/op	 141.19 MB/s
 Search_Success_RE2/16M	       5	 219468600 ns/op	  76.44 MB/s
 Search_Success_CachedPCRE/8	 5000000	       328 ns/op	  24.35 MB/s
 Search_Success_CachedPCRE/16	 5000000	       389 ns/op	  41.06 MB/s
 Search_Success_CachedPCRE/32	 5000000	       507 ns/op	  63.11 MB/s
 Search_Success_CachedPCRE/64	 2000000	       754 ns/op	  84.80 MB/s
 Search_Success_CachedPCRE/128	 1000000	      1164 ns/op	 109.89 MB/s
 Search_Success_CachedPCRE/256	 1000000	      2051 ns/op	 124.81 MB/s
 Search_Success_CachedPCRE/512	  500000	      3831 ns/op	 133.64 MB/s
 Search_Success_CachedPCRE/1K	  500000	      7280 ns/op	 140.66 MB/s
 Search_Success_CachedPCRE/2K	  200000	     14254 ns/op	 143.67 MB/s
 Search_Success_CachedPCRE/4K	  100000	     28223 ns/op	 145.13 MB/s
 Search_Success_CachedPCRE/8K	   50000	     55445 ns/op	 147.75 MB/s
 Search_Success_CachedPCRE/16K	   10000	    112739 ns/op	 145.33 MB/s
 Search_Success_CachedPCRE/32K	   10000	    219943 ns/op	 148.98 MB/s
 Search_Success_CachedPCRE/64K	    5000	    440884 ns/op	 148.65 MB/s
 Search_Success_CachedPCRE/128K	    2000	    898950 ns/op	 145.81 MB/s
 Search_Success_CachedPCRE/256K	    1000	   1775905 ns/op	 147.61 MB/s
 Search_Success_CachedPCRE/512K	     500	   3579178 ns/op	 146.48 MB/s
 Search_Success_CachedPCRE/1M	     200	   7278075 ns/op	 144.07 MB/s
 Search_Success_CachedPCRE/2M	     100	  14954670 ns/op	 140.23 MB/s
 Search_Success_CachedPCRE/4M	      50	  31865060 ns/op	 131.63 MB/s
 Search_Success_CachedPCRE/8M	      20	  73977900 ns/op	 113.39 MB/s
 Search_Success_CachedPCRE/16M	       5	 250587400 ns/op	  66.95 MB/s
 Search_Success_CachedRE2/8	10000000	       206 ns/op	  38.80 MB/s
 Search_Success_CachedRE2/16	 5000000	       598 ns/op	  26.75 MB/s
 Search_Success_CachedRE2/32	 5000000	       675 ns/op	  47.39 MB/s
 Search_Success_CachedRE2/64	 2000000	       847 ns/op	  75.52 MB/s
 Search_Success_CachedRE2/128	 1000000	      1211 ns/op	 105.65 MB/s
 Search_Success_CachedRE2/256	 1000000	      1886 ns/op	 135.73 MB/s
 Search_Success_CachedRE2/512	  500000	      3123 ns/op	 163.90 MB/s
 Search_Success_CachedRE2/1K	  500000	      5754 ns/op	 177.94 MB/s
 Search_Success_CachedRE2/2K	  200000	     10929 ns/op	 187.38 MB/s
 Search_Success_CachedRE2/4K	  100000	     20887 ns/op	 196.10 MB/s
 Search_Success_CachedRE2/8K	   50000	     41295 ns/op	 198.37 MB/s
 Search_Success_CachedRE2/16K	   20000	     82338 ns/op	 198.98 MB/s
 Search_Success_CachedRE2/32K	   10000	    168893 ns/op	 194.02 MB/s
 Search_Success_CachedRE2/64K	    5000	    337449 ns/op	 194.21 MB/s
 Search_Success_CachedRE2/128K	    5000	    670247 ns/op	 195.56 MB/s
 Search_Success_CachedRE2/256K	    2000	   1342666 ns/op	 195.24 MB/s
 Search_Success_CachedRE2/512K	    1000	   2711677 ns/op	 193.34 MB/s
 Search_Success_CachedRE2/1M	     500	   5403052 ns/op	 194.07 MB/s
 Search_Success_CachedRE2/2M	     100	  11697250 ns/op	 179.29 MB/s
 Search_Success_CachedRE2/4M	      50	  24796680 ns/op	 169.15 MB/s
 Search_Success_CachedRE2/8M	      20	  59587450 ns/op	 140.78 MB/s
 Search_Success_CachedRE2/16M	       5	 225415400 ns/op	  74.43 MB/s
 Search_Success1_PCRE/8	  500000	      4063 ns/op	   1.97 MB/s
 Search_Success1_PCRE/16	  500000	      4104 ns/op	   3.90 MB/s
 Search_Success1_PCRE/32	  500000	      4162 ns/op	   7.69 MB/s
 Search_Success1_PCRE/64	  500000	      4284 ns/op	  14.94 MB/s
 Search_Success1_PCRE/128	  500000	      4857 ns/op	  26.35 MB/s
 Search_Success1_PCRE/256	  500000	      5507 ns/op	  46.48 MB/s
 Search_Success1_PCRE/512	  500000	      7203 ns/op	  71.08 MB/s
 Search_Success1_PCRE/1K	  200000	     10470 ns/op	  97.80 MB/s
 Search_Success1_PCRE/2K	  100000	     17455 ns/op	 117.33 MB/s
 Search_Success1_PCRE/4K	   50000	     31564 ns/op	 129.77 MB/s
 Search_Success1_PCRE/8K	   50000	     59112 ns/op	 138.58 MB/s
 Search_Success1_PCRE/16K	   10000	    115903 ns/op	 141.36 MB/s
 Search_Success1_PCRE/32K	   10000	    223311 ns/op	 146.74 MB/s
 Search_Success1_PCRE/64K	    5000	    447509 ns/op	 146.45 MB/s
 Search_Success1_PCRE/128K	    2000	    874543 ns/op	 149.87 MB/s
 Search_Success1_PCRE/256K	    1000	   1836342 ns/op	 142.75 MB/s
 Search_Success1_PCRE/512K	     500	   3636250 ns/op	 144.18 MB/s
 Search_Success1_PCRE/1M	     200	   7256345 ns/op	 144.50 MB/s
 Search_Success1_PCRE/2M	     100	  15093450 ns/op	 138.94 MB/s
 Search_Success1_PCRE/4M	      50	  32167920 ns/op	 130.39 MB/s
 Search_Success1_PCRE/8M	      20	  74735800 ns/op	 112.24 MB/s
 Search_Success1_PCRE/16M	       5	 252818600 ns/op	  66.36 MB/s
 Search_Success1_RE2/8	   50000	     51778 ns/op	   0.15 MB/s
 Search_Success1_RE2/16	   50000	     50754 ns/op	   0.32 MB/s
 Search_Success1_RE2/32	   50000	     51127 ns/op	   0.63 MB/s
 Search_Success1_RE2/64	   50000	     51305 ns/op	   1.25 MB/s
 Search_Success1_RE2/128	   50000	     51580 ns/op	   2.48 MB/s
 Search_Success1_RE2/256	   50000	     52019 ns/op	   4.92 MB/s
 Search_Success1_RE2/512	   50000	     53145 ns/op	   9.63 MB/s
 Search_Success1_RE2/1K	   50000	     55871 ns/op	  18.33 MB/s
 Search_Success1_RE2/2K	   50000	     61477 ns/op	  33.31 MB/s
 Search_Success1_RE2/4K	   50000	     71875 ns/op	  56.99 MB/s
 Search_Success1_RE2/8K	   20000	     94822 ns/op	  86.39 MB/s
 Search_Success1_RE2/16K	   10000	    137021 ns/op	 119.57 MB/s
 Search_Success1_RE2/32K	   10000	    220596 ns/op	 148.54 MB/s
 Search_Success1_RE2/64K	    5000	    377808 ns/op	 173.46 MB/s
 Search_Success1_RE2/128K	    5000	    707546 ns/op	 185.25 MB/s
 Search_Success1_RE2/256K	    2000	   1367308 ns/op	 191.72 MB/s
 Search_Success1_RE2/512K	    1000	   2729291 ns/op	 192.10 MB/s
 Search_Success1_RE2/1M	     500	   5439634 ns/op	 192.77 MB/s
 Search_Success1_RE2/2M	     100	  11626860 ns/op	 180.37 MB/s
 Search_Success1_RE2/4M	      50	  24603160 ns/op	 170.48 MB/s
 Search_Success1_RE2/8M	      20	  59001300 ns/op	 142.18 MB/s
 Search_Success1_RE2/16M	       5	 219520200 ns/op	  76.43 MB/s
 Search_Success1_Cached_PCRE/8	 5000000	       373 ns/op	  21.41 MB/s
 Search_Success1_Cached_PCRE/16	 5000000	       437 ns/op	  36.61 MB/s
 Search_Success1_Cached_PCRE/32	 5000000	       543 ns/op	  58.84 MB/s
 Search_Success1_Cached_PCRE/64	 2000000	       784 ns/op	  81.60 MB/s
 Search_Success1_Cached_PCRE/128	 1000000	      1193 ns/op	 107.29 MB/s
 Search_Success1_Cached_PCRE/256	 1000000	      2044 ns/op	 125.23 MB/s
 Search_Success1_Cached_PCRE/512	  500000	      3734 ns/op	 137.10 MB/s
 Search_Success1_Cached_PCRE/1K	  500000	      7121 ns/op	 143.78 MB/s
 Search_Success1_Cached_PCRE/2K	  200000	     13767 ns/op	 148.76 MB/s
 Search_Success1_Cached_PCRE/4K	  100000	     27176 ns/op	 150.72 MB/s
 Search_Success1_Cached_PCRE/8K	   50000	     54155 ns/op	 151.27 MB/s
 Search_Success1_Cached_PCRE/16K	   10000	    109309 ns/op	 149.89 MB/s
 Search_Success1_Cached_PCRE/32K	   10000	    215890 ns/op	 151.78 MB/s
 Search_Success1_Cached_PCRE/64K	    5000	    432550 ns/op	 151.51 MB/s
 Search_Success1_Cached_PCRE/128K	    2000	    870568 ns/op	 150.56 MB/s
 Search_Success1_Cached_PCRE/256K	    1000	   1756215 ns/op	 149.27 MB/s
 Search_Success1_Cached_PCRE/512K	     500	   3671994 ns/op	 142.78 MB/s
 Search_Success1_Cached_PCRE/1M	     200	   7134810 ns/op	 146.97 MB/s
 Search_Success1_Cached_PCRE/2M	     100	  14672580 ns/op	 142.93 MB/s
 Search_Success1_Cached_PCRE/4M	      50	  31146040 ns/op	 134.67 MB/s
 Search_Success1_Cached_PCRE/8M	      20	  72224500 ns/op	 116.15 MB/s
 Search_Success1_Cached_PCRE/16M	       5	 243683800 ns/op	  68.85 MB/s
 Search_Success1_Cached_RE2/8	 5000000	       544 ns/op	  14.69 MB/s
 Search_Success1_Cached_RE2/16	 5000000	       583 ns/op	  27.43 MB/s
 Search_Success1_Cached_RE2/32	 5000000	       661 ns/op	  48.37 MB/s
 Search_Success1_Cached_RE2/64	 2000000	       818 ns/op	  78.23 MB/s
 Search_Success1_Cached_RE2/128	 1000000	      1148 ns/op	 111.40 MB/s
 Search_Success1_Cached_RE2/256	 1000000	      1778 ns/op	 143.95 MB/s
 Search_Success1_Cached_RE2/512	  500000	      3036 ns/op	 168.64 MB/s
 Search_Success1_Cached_RE2/1K	  500000	      5549 ns/op	 184.53 MB/s
 Search_Success1_Cached_RE2/2K	  200000	     10580 ns/op	 193.56 MB/s
 Search_Success1_Cached_RE2/4K	  100000	     20645 ns/op	 198.39 MB/s
 Search_Success1_Cached_RE2/8K	   50000	     40775 ns/op	 200.90 MB/s
 Search_Success1_Cached_RE2/16K	   20000	     81030 ns/op	 202.20 MB/s
 Search_Success1_Cached_RE2/32K	   10000	    162338 ns/op	 201.85 MB/s
 Search_Success1_Cached_RE2/64K	    5000	    324387 ns/op	 202.03 MB/s
 Search_Success1_Cached_RE2/128K	    5000	    648468 ns/op	 202.13 MB/s
 Search_Success1_Cached_RE2/256K	    2000	   1299439 ns/op	 201.74 MB/s
 Search_Success1_Cached_RE2/512K	    1000	   2608958 ns/op	 200.96 MB/s
 Search_Success1_Cached_RE2/1M	     500	   5263964 ns/op	 199.20 MB/s
 Search_Success1_Cached_RE2/2M	     200	  10793175 ns/op	 194.30 MB/s
 Search_Success1_Cached_RE2/4M	      50	  24138120 ns/op	 173.76 MB/s
 Search_Success1_Cached_RE2/8M	      20	  58223300 ns/op	 144.08 MB/s
 Search_Success1_Cached_RE2/16M	       5	 215741400 ns/op	  77.77 MB/s
 Search_Digits_PCRE	  500000	      7534 ns/op
 Search_Digits_RE2	   50000	     44162 ns/op
 Parse_Digits_PCRE	  200000	      7664 ns/op
 Parse_Digits_RE2	  100000	     22595 ns/op
 Parse_CachedDigits_PCRE	 5000000	       721 ns/op
 Parse_CachedDigits_RE2	 5000000	       413 ns/op
 Parse_DigitDs_PCRE	  500000	      7095 ns/op
 Parse_DigitDs_RE2	  100000	     22259 ns/op
 Parse_CachedDigitDs_PCRE	 5000000	       704 ns/op
 Parse_CachedDigitDs_RE2	 5000000	       415 ns/op
 Parse_Split_PCRE	  500000	      5540 ns/op
 Parse_Split_RE2	  100000	     23817 ns/op
 Parse_CachedSplit_PCRE	 5000000	       490 ns/op
 Parse_CachedSplit_RE2	10000000	       251 ns/op
 Parse_SplitHard_PCRE	  500000	      5410 ns/op
 Parse_SplitHard_RE2	  100000	     28518 ns/op
 Parse_CachedSplitHard_PCRE	 5000000	       488 ns/op
 Parse_CachedSplitHard_RE2	 1000000	      2489 ns/op
 Parse_CachedSplitBig1_PCRE	     500	   7171752 ns/op
 Parse_CachedSplitBig1_RE2	    2000	    990722 ns/op
 Parse_CachedSplitBig2_PCRE	    5000	    658331 ns/op
 Parse_CachedSplitBig2_RE2	      20	  81205250 ns/op
 BM_PCRE_Compile	  500000	      6443 ns/op
 BM_RE2_Compile	  100000	     24103 ns/op
 SearchPhone_CachedPCRE/8	 1000000	      2010 ns/op	   3.98 MB/s
 SearchPhone_CachedPCRE/16	  500000	      3286 ns/op	   4.87 MB/s
 SearchPhone_CachedPCRE/32	  500000	      5953 ns/op	   5.37 MB/s
 SearchPhone_CachedPCRE/64	  200000	     11181 ns/op	   5.72 MB/s
 SearchPhone_CachedPCRE/128	  100000	     21634 ns/op	   5.92 MB/s
 SearchPhone_CachedPCRE/256	   50000	     42315 ns/op	   6.05 MB/s
 SearchPhone_CachedPCRE/512	   20000	     83969 ns/op	   6.10 MB/s
 SearchPhone_CachedPCRE/1K	   10000	    166005 ns/op	   6.17 MB/s
 SearchPhone_CachedPCRE/2K	    5000	    327433 ns/op	   6.25 MB/s
 SearchPhone_CachedPCRE/4K	    5000	    654794 ns/op	   6.26 MB/s
 SearchPhone_CachedPCRE/8K	    2000	   1302747 ns/op	   6.29 MB/s
 SearchPhone_CachedPCRE/16K	    1000	   2601137 ns/op	   6.30 MB/s
 SearchPhone_CachedPCRE/32K	     500	   5170166 ns/op	   6.34 MB/s
 SearchPhone_CachedPCRE/64K	     100	  10378910 ns/op	   6.31 MB/s
 SearchPhone_CachedPCRE/128K	     100	  20783360 ns/op	   6.31 MB/s
 SearchPhone_CachedPCRE/256K	      50	  41632940 ns/op	   6.30 MB/s
 SearchPhone_CachedPCRE/512K	      20	  83663300 ns/op	   6.27 MB/s
 SearchPhone_CachedPCRE/1M	      10	 167093400 ns/op	   6.28 MB/s
 SearchPhone_CachedPCRE/2M	       5	 335078800 ns/op	   6.26 MB/s
 SearchPhone_CachedPCRE/4M	       5	 673405400 ns/op	   6.23 MB/s
 SearchPhone_CachedPCRE/8M	       1	1335761000 ns/op	   6.28 MB/s
 SearchPhone_CachedPCRE/16M	       1	2682908000 ns/op	   6.25 MB/s
 SearchPhone_CachedRE2/8	 1000000	      1470 ns/op	   5.44 MB/s
 SearchPhone_CachedRE2/16	 1000000	      1496 ns/op	  10.69 MB/s
 SearchPhone_CachedRE2/32	 1000000	      1570 ns/op	  20.38 MB/s
 SearchPhone_CachedRE2/64	 1000000	      1770 ns/op	  36.15 MB/s
 SearchPhone_CachedRE2/128	 1000000	      2082 ns/op	  61.46 MB/s
 SearchPhone_CachedRE2/256	 1000000	      2701 ns/op	  94.78 MB/s
 SearchPhone_CachedRE2/512	  500000	      3963 ns/op	 129.19 MB/s
 SearchPhone_CachedRE2/1K	  500000	      6487 ns/op	 157.85 MB/s
 SearchPhone_CachedRE2/2K	  200000	     11527 ns/op	 177.67 MB/s
 SearchPhone_CachedRE2/4K	  100000	     21579 ns/op	 189.81 MB/s
 SearchPhone_CachedRE2/8K	   50000	     41804 ns/op	 195.96 MB/s
 SearchPhone_CachedRE2/16K	   20000	     82228 ns/op	 199.25 MB/s
 SearchPhone_CachedRE2/32K	   10000	    163444 ns/op	 200.48 MB/s
 SearchPhone_CachedRE2/64K	    5000	    325307 ns/op	 201.46 MB/s
 SearchPhone_CachedRE2/128K	    5000	    648559 ns/op	 202.10 MB/s
 SearchPhone_CachedRE2/256K	    2000	   1295574 ns/op	 202.34 MB/s
 SearchPhone_CachedRE2/512K	    1000	   2591267 ns/op	 202.33 MB/s
 SearchPhone_CachedRE2/1M	     500	   5178738 ns/op	 202.48 MB/s
 SearchPhone_CachedRE2/2M	     100	  10389680 ns/op	 201.85 MB/s
 SearchPhone_CachedRE2/4M	     100	  20851510 ns/op	 201.15 MB/s
 SearchPhone_CachedRE2/8M	      50	  41763800 ns/op	 200.86 MB/s
 SearchPhone_CachedRE2/16M	      20	  83492800 ns/op	 200.94 MB/s
 EmptyPartialMatchPCRE	10000000	       195 ns/op
 EmptyPartialMatchRE2	 5000000	       497 ns/op
 SimplePartialMatchPCRE	10000000	       276 ns/op
 SimplePartialMatchRE2	 5000000	       548 ns/op
 HTTPPartialMatchPCRE	 2000000	       826 ns/op
 HTTPPartialMatchRE2	 2000000	       894 ns/op
 SmallHTTPPartialMatchPCRE	 2000000	       825 ns/op
 SmallHTTPPartialMatchRE2	 2000000	       895 ns/op
 DotMatchPCRE	 2000000	       810 ns/op
 DotMatchRE2	 2000000	       976 ns/op
 ASCIIMatchPCRE	 5000000	       604 ns/op
 ASCIIMatchRE2	 2000000	       976 ns/op

1475

third_party/re2/benchlog/benchlog.r70 vendored

File diff suppressed because it is too large Load Diff

1058

third_party/re2/benchlog/benchlog.wreck vendored

File diff suppressed because it is too large Load Diff

									
										98

third_party/re2/benchlog/benchplot.py
									
										vendored
									
				@ -1,98 +0,0 @@

				#!/usr/bin/env python

				import argparse     # for ArgumentParser

				import subprocess   # for Popen

				import tempfile     # for NamedTemporaryFile

				import os           # for remove

				class gnuplot(object):

				    output = "result.png"

				    script = """

				             set terminal png size 1024, 768

				             set output "{}.png"

				             set title "re2 benchlog"

				             set datafile separator ";"

				             set grid x y

				             set ylabel "MB/s"

				             set autoscale

				             plot """

				    template = """'{}' using 1:5:xticlabels(2) with linespoints linewidth 3 title "{}",\\\n"""

				    benchdata = dict()

				    tempfiles = []

				    def __enter__(self):

				        return self

				    def __exit__(self, type, value, traceback):

				        """

				        remove all temporary files

				        """

				        for filename in self.tempfiles:

				            os.remove(filename)

				    def parse_re2_benchlog(self, filename):

				        """

				        parse the input benchlog and return a dictionary contain bench data

				        """

				        benchdata = self.benchdata

				        with open(filename) as f:

				            for raw in f.readlines():

				                data = raw.split('\t')

				                if len(data) == 4:

				                    data = data[0].split('/') + data[1:]

				                    data = list(map(str.strip, data))

				                    if not benchdata.get(data[0]):

				                        benchdata[data[0]] = [ data[1:] ]

				                    else:

				                        benchdata[data[0]].append(data[1:])

				    def gen_csv(self):

				        """

				        generate temporary csv files

				        """

				        for name, data in self.benchdata.items():

				            with tempfile.NamedTemporaryFile(delete=False) as f:

				                for index, line in enumerate(data):

				                    f.write('{};{}\n'.format(index, ';'.join(line)).encode())

				                self.tempfiles.append(f.name)

				                self.script = self.script + self.template.format(f.name, name)

				    def run(self):

				        self.gen_csv()

				        script = self.script[:-3].format(self.output)

				        command = subprocess.Popen(['gnuplot'], stdin=subprocess.PIPE)

				        command.communicate(script.encode())

				if __name__ == '__main__':

				    parser = argparse.ArgumentParser(description='generate plots for benchlog')

				    parser.add_argument('benchlog', type=str, help='benchlog generated by re2')

				    args = parser.parse_args()

				    try:

				        subprocess.Popen(['gnuplot'], stdin=subprocess.PIPE)

				    except FileNotFoundError:

				        print('you can install "gnuplot" to generate plots automatically')

				        exit(1)

				    with gnuplot() as plot:

				        plot.output = args.benchlog

				        plot.parse_re2_benchlog(args.benchlog)

				        plot.run()

155

third_party/re2/benchlog/mktable vendored

 @ -1,155 +0,0 @@
 #!/usr/bin/perl
 # XXX
 sub table() {
 	my ($name) = @_;
 	print <<'EOF';
 <table border=0>
 <tr><th>System</th><th>PCRE</th><th>RE2</th></tr>
 EOF
 	foreach my $sys (@sys) {
 		my $ns_pcre = $data{$sys}->{sprintf($name, "PCRE")}->{'ns/op'};
 		my $ns_re2 = $data{$sys}->{sprintf($name, "RE2")}->{'ns/op'};
 		printf "<tr><td>%s</td><td>%.1f µs</td><td>%.1f µs</td></tr>\n", $sysname{$sys}, $ns_pcre/1000., $ns_re2/1000.;
 	}
 	print <<'EOF';
 <tr height=5><td colspan=3></td></tr>
 </table>
 EOF
 }
 @sizes = (
 	"8", "16", "32", "64", "128", "256", "512",
 	"1K", "2K", "4K", "8K", "16K", "32K", "64K", "128K", "256K", "512K",
 	"1M", "2M", "4M", "8M", "16M"
 );
 %color = (
 	"PCRE" => "0.7 0 0",
 	"RE2" => "0 0 1",
 );
 $ngraph = 0;
 sub graph() {
 	my ($name) = @_;
 	my $sys = "wreck";
 	my $base = sprintf("regexp3g%d", ++$ngraph);
 	open(JGR, ">$base.jgr") || die "open >$base.jgr: $!";
 	printf JGR "bbox -20 -12 392 95\n";
 	printf JGR "newgraph clip x_translate 0.25 y_translate 0.25\n";
 	$ymax = 0;
 	%lastx = ();
 	%lasty = ();
 	foreach my $who ("PCRE", "RE2") {
 		printf JGR "newcurve pts\n";
 		for(my $i=0; $i<@sizes; $i++) {
 			my $key = sprintf("%s%s/%s", $name, $who, $sizes[$i]);
 			my $val = $data{$sys}->{$key}->{'MB/s'};
 			next if !defined($val);
 			if($val > $ymax) {
 				$ymax = $val;
 			}
 			$lastx{$who} = $i;
 			$lasty{$who} = $val;
 			printf JGR "$i %f (* %s *)\n", $val, $key;
 		}
 		my $color = $color{$who};
 		printf JGR "marktype none color $color linethickness 2 linetype solid label : $who\n";
 	}
 	my $n = @sizes;
 	printf JGR "xaxis min -1 max $n size 5 label : text size (bytes)\n";
 	printf JGR "  no_auto_hash_marks hash_labels fontsize 9\n";
 	for($i=0; $i<@sizes; $i+=3) {
 		printf JGR "  hash_at $i hash_label at $i : $sizes[$i]\n";
 	}
 	my $y = 1;
 	while(10*$y <= $ymax) {
 		$y = 10*$y;
 	}
 	for($i=2; $i<=10; $i++) {
 		if($i*$y > $ymax) {
 			$y = $i*$y;
 			last;
 		}
 	}
 	foreach my $who ("PCRE", "RE2") {
 		$x1 = $lastx{$who};
 		$y1 = $lasty{$who};
 		$x1 *= 1.01;
 		my $v = "vjc";
 		if($y1 < 0.05 * $y) {
 			$v = "vjb";
 			$y1 = 0.05 * $y;
 		}
 		printf JGR "newstring x $x1 y $y1 hjl $v : $who\n";
 	}
 	printf JGR "yaxis min 0 max $y size 1 label : speed (MB/s)\n";
 	printf JGR "  hash_labels fontsize 9\n";
 	# printf JGR "legend defaults font Times-Roman fontsize 10 x 0 y $y hjl vjt\n";
 	system("jgraph $base.jgr >$base.eps"); # die "system: $!";
 	system("gs -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dEPSCrop -sDEVICE=png16m -r100 -sOutputFile=$base.png -dBATCH -dQUIT -dQUIET -dNOPAUSE $base.eps");
 	printf "<img src=$base.png>\n"
 }
 sub skip() {
 	while(<>) {
 		if(/^<!-- -->/) {
 			print;
 			last;
 		}
 	}
 }
 @sys = ("r70", "c2", "wreck", "mini");
 %sysname = (
 	"r70" => "AMD Opteron 8214 HE, 2.2 GHz",
 	"c2" => "Intel Core2 Duo E7200, 2.53 GHz",
 	"wreck" => "Intel Xeon 5150, 2.66 GHz (Mac Pro)",
 	"mini" => "Intel Core2 T5600, 1.83 GHz (Mac Mini)",
 );
 %func = (
 	"table" => \&table,
 	"graph" => \&graph,
 );
 foreach my $sys (@sys) {
 	open(F, "benchlog.$sys") || die "open benchlog.$sys: $!";
 	my %sysdat;
 	while(<F>) {
 		if(/^([A-Za-z0-9_\/]+)\s+(\d+)\s+(\d+) ns\/op/) {
 			my %row;
 			$row{"name"} = $1;
 			$row{"iter"} = $2;
 			$row{"ns/op"} = $3;
 			if(/([\d.]+) MB\/s/){
 				$row{"MB/s"} = $1;
 			}
 			$sysdat{$row{"name"}} = \%row;
 		}
 	}
 	close F;
 	$data{$sys} = \%sysdat;
 }
 while(<>) {
 	print;
 	if(/^<!-- benchlog (\w+) -->/) {
 		$func{$1}();
 		skip();
 		next;
 	}
 	if(/^<!-- benchlog (\w+) ([%\w]+) -->/) {
 		$func{$1}($2);
 		skip();
 		next;
 	}
 }

1

third_party/re2/doc/README.xkcd vendored

				`@ -1 +0,0 @@`
				`xkcd.png is a cropped version of http://xkcd.com/208/`

41

third_party/re2/doc/mksyntaxgo vendored

 @ -1,41 +0,0 @@
 #!/bin/sh
 set -e
 out=$GOROOT/src/regexp/syntax/doc.go
 cp syntax.txt $out
 sam -d $out <<'!'
 ,x g/NOT SUPPORTED/d
 /^Unicode character class/,$d
 ,s/[«»]//g
 ,x g/^Possessive repetitions:/d
 ,x g/\\C/d
 ,x g/Flag syntax/d
 ,s/.=(true|false)/flag &/g
 ,s/^Flags:/  Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are:\n/
 ,s/\n\n\n+/\n\n/g
 ,x/(^.*	.*\n)+/ | awk -F'	' '{printf("  %-14s %s\n", $1, $2)}'
 ,2c
 // Copyright 2012 The Go Authors.  All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // DO NOT EDIT. This file is generated by mksyntaxgo from the RE2 distribution.
 /*
 Package syntax parses regular expressions into parse trees and compiles
 parse trees into programs. Most clients of regular expressions will use the
 facilities of package regexp (such as Compile and Match) instead of this package.
 Syntax
 The regular expression syntax understood by this package when parsing with the Perl flag is as follows.
 Parts of the syntax can be disabled by passing alternate flags to Parse.
 .
 $a
 */
 package syntax
 .
 w
 q
 !

42

third_party/re2/doc/mksyntaxhtml vendored

 @ -1,42 +0,0 @@
 #!/bin/sh
 cp syntax.txt syntax.html
 sam -d syntax.html <<'!'
 ,s/\&/\&amp;/g
 ,s/</\&lt;/g
 ,s/>/\&gt;/g
 ,s!== (([^()]|\([^()]*\))*)!≡ <code>\1</code>!g
 ,s!«!<code>!g
 ,s!»!</code>!g
 ,s! vim$! <font size=-2>VIM</font>!g
 ,s! pcre$! <font size=-2>PCRE</font>!g
 ,s! perl$! <font size=-2>PERL</font>!g
 ,x g/NOT SUPPORTED/ s!^[^	]+!<font color=#808080>&</font>!
 ,s!NOT SUPPORTED!!g
 ,s!(^[^	]+)	(.*)\n!<tr><td><code>\1</code></td><td>\2</td></tr>\n!g
 ,s!.*:$!<b>&</b>!g
 ,s!^$!<tr><td></td></tr>!g
 ,x v/<tr>/ s!.*!<tr><td colspan=2>&</td></tr>!
 ,2c
 <html>
 <!-- AUTOMATICALLY GENERATED by mksyntaxhtml -->
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
 <title>RE2 regular expression syntax reference</title>
 </head>
 <body>
 <h1>RE2 regular expression syntax reference</h1>
 <table border=0 cellpadding=2 cellspacing=2>
 <tr><td colspan=2>This page lists the regular expression syntax accepted by RE2.</td></tr>
 <tr><td colspan=2>It also lists syntax accepted by PCRE, PERL, and VIM.</td></tr>
 <tr><td colspan=2>Grayed out expressions are not supported by RE2.</td></tr>
 .
 $a
 </table>
 </body>
 </html>
 .
 w
 q
 !

36

third_party/re2/doc/mksyntaxwiki vendored

 @ -1,36 +0,0 @@
 #!/bin/sh
 cp syntax.txt syntax.wiki
 sam -d syntax.wiki <<'!'
 ,s!`!`````!g
 ,s!== (([^()]|\([^()]*\))*)!≡ `\1`!g
 ,s!«!`!g
 ,s!»!`!g
 ,s! vim$! <font size="1">VIM</font>!g
 ,s! pcre$! <font size="1">PCRE</font>!g
 ,s! perl$! <font size="1">PERL</font>!g
 ,s!(^[^	]+)	(.*)\n!`\1`	\2\n!g
 ,x g/NOT SUPPORTED/ s!^[^	]+!<font color="#808080">&</font>!
 ,s!NOT SUPPORTED!<font size="1">(&)</font>!g
 ,s!(^[^	]+)	(.*)\n!<tr><td>\1</td><td>\2</td></tr>\n!g
 ,s!.*:$!<b>&</b>!g
 ,s!^$!<tr><td></td></tr>!g
 ,x v/<tr>/ s!.*!<tr><td colspan="2">&</td></tr>!
 ,2c
 #summary I define UNIX as “30 definitions of regular expressions living under one roof.” —Don Knuth
 <wiki:comment>
 GENERATED BY mksyntaxwiki.  DO NOT EDIT
 </wiki:comment>
 <table border="0" cellpadding="2" cellspacing="2">
 <tr><td colspan="2">This page lists the regular expression syntax accepted by RE2.</td></tr>
 <tr><td colspan="2">It also lists syntax accepted by PCRE, PERL, and VIM.</td></tr>
 <tr><td colspan="2">Grayed out expressions are not supported by RE2.</td></tr>
 .
 $a
 </table>
 .
 w
 q
 !

									
										409

third_party/re2/doc/syntax.html
									
										vendored
									
				@ -1,409 +0,0 @@

				<html>

				<!-- AUTOMATICALLY GENERATED by mksyntaxhtml -->

				<head>

				<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>

				<title>RE2 regular expression syntax reference</title>

				</head>

				<body>

				<h1>RE2 regular expression syntax reference</h1>

				<table border=0 cellpadding=2 cellspacing=2>

				<tr><td colspan=2>This page lists the regular expression syntax accepted by RE2.</td></tr>

				<tr><td colspan=2>It also lists syntax accepted by PCRE, PERL, and VIM.</td></tr>

				<tr><td colspan=2>Grayed out expressions are not supported by RE2.</td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Single characters:</b></td></tr>

				<tr><td><code>.</code></td><td>any character, possibly including newline (s=true)</td></tr>

				<tr><td><code>[xyz]</code></td><td>character class</td></tr>

				<tr><td><code>[^xyz]</code></td><td>negated character class</td></tr>

				<tr><td><code>\d</code></td><td>Perl character class</td></tr>

				<tr><td><code>\D</code></td><td>negated Perl character class</td></tr>

				<tr><td><code>[[:alpha:]]</code></td><td>ASCII character class</td></tr>

				<tr><td><code>[[:^alpha:]]</code></td><td>negated ASCII character class</td></tr>

				<tr><td><code>\pN</code></td><td>Unicode character class (one-letter name)</td></tr>

				<tr><td><code>\p{Greek}</code></td><td>Unicode character class</td></tr>

				<tr><td><code>\PN</code></td><td>negated Unicode character class (one-letter name)</td></tr>

				<tr><td><code>\P{Greek}</code></td><td>negated Unicode character class</td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Composites:</b></td></tr>

				<tr><td><code>xy</code></td><td><code>x</code> followed by <code>y</code></td></tr>

				<tr><td><code>x|y</code></td><td><code>x</code> or <code>y</code> (prefer <code>x</code>)</td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Repetitions:</b></td></tr>

				<tr><td><code>x*</code></td><td>zero or more <code>x</code>, prefer more</td></tr>

				<tr><td><code>x+</code></td><td>one or more <code>x</code>, prefer more</td></tr>

				<tr><td><code>x?</code></td><td>zero or one <code>x</code>, prefer one</td></tr>

				<tr><td><code>x{n,m}</code></td><td><code>n</code> or <code>n</code>+1 or ... or <code>m</code> <code>x</code>, prefer more</td></tr>

				<tr><td><code>x{n,}</code></td><td><code>n</code> or more <code>x</code>, prefer more</td></tr>

				<tr><td><code>x{n}</code></td><td>exactly <code>n</code> <code>x</code></td></tr>

				<tr><td><code>x*?</code></td><td>zero or more <code>x</code>, prefer fewer</td></tr>

				<tr><td><code>x+?</code></td><td>one or more <code>x</code>, prefer fewer</td></tr>

				<tr><td><code>x??</code></td><td>zero or one <code>x</code>, prefer zero</td></tr>

				<tr><td><code>x{n,m}?</code></td><td><code>n</code> or <code>n</code>+1 or ... or <code>m</code> <code>x</code>, prefer fewer</td></tr>

				<tr><td><code>x{n,}?</code></td><td><code>n</code> or more <code>x</code>, prefer fewer</td></tr>

				<tr><td><code>x{n}?</code></td><td>exactly <code>n</code> <code>x</code></td></tr>

				<tr><td><code><font color=#808080>x{}</font></code></td><td>(≡ <code>x*</code>)  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>x{-}</font></code></td><td>(≡ <code>x*?</code>)  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>x{-n}</font></code></td><td>(≡ <code>x{n}?</code>)  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>x=</font></code></td><td>(≡ <code>x?</code>)  <font size=-2>VIM</font></td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Possessive repetitions:</b></td></tr>

				<tr><td><code><font color=#808080>x*+</font></code></td><td>zero or more <code>x</code>, possessive </td></tr>

				<tr><td><code><font color=#808080>x++</font></code></td><td>one or more <code>x</code>, possessive </td></tr>

				<tr><td><code><font color=#808080>x?+</font></code></td><td>zero or one <code>x</code>, possessive </td></tr>

				<tr><td><code><font color=#808080>x{n,m}+</font></code></td><td><code>n</code> or ... or <code>m</code> <code>x</code>, possessive </td></tr>

				<tr><td><code><font color=#808080>x{n,}+</font></code></td><td><code>n</code> or more <code>x</code>, possessive </td></tr>

				<tr><td><code><font color=#808080>x{n}+</font></code></td><td>exactly <code>n</code> <code>x</code>, possessive </td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Grouping:</b></td></tr>

				<tr><td><code>(re)</code></td><td>numbered capturing group</td></tr>

				<tr><td><code>(?P&lt;name&gt;re)</code></td><td>named &amp; numbered capturing group</td></tr>

				<tr><td><code><font color=#808080>(?&lt;name&gt;re)</font></code></td><td>named &amp; numbered capturing group </td></tr>

				<tr><td><code><font color=#808080>(?'name're)</font></code></td><td>named &amp; numbered capturing group </td></tr>

				<tr><td><code>(?:re)</code></td><td>non-capturing group</td></tr>

				<tr><td><code>(?flags)</code></td><td>set flags within current group; non-capturing</td></tr>

				<tr><td><code>(?flags:re)</code></td><td>set flags during re; non-capturing</td></tr>

				<tr><td><code><font color=#808080>(?#text)</font></code></td><td>comment </td></tr>

				<tr><td><code><font color=#808080>(?|x|y|z)</font></code></td><td>branch numbering reset </td></tr>

				<tr><td><code><font color=#808080>(?&gt;re)</font></code></td><td>possessive match of <code>re</code> </td></tr>

				<tr><td><code><font color=#808080>re@&gt;</font></code></td><td>possessive match of <code>re</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>%(re)</font></code></td><td>non-capturing group  <font size=-2>VIM</font></td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Flags:</b></td></tr>

				<tr><td><code>i</code></td><td>case-insensitive (default false)</td></tr>

				<tr><td><code>m</code></td><td>multi-line mode: <code>^</code> and <code>$</code> match begin/end line in addition to begin/end text (default false)</td></tr>

				<tr><td><code>s</code></td><td>let <code>.</code> match <code>\n</code> (default false)</td></tr>

				<tr><td><code>U</code></td><td>ungreedy: swap meaning of <code>x*</code> and <code>x*?</code>, <code>x+</code> and <code>x+?</code>, etc (default false)</td></tr>

				<tr><td colspan=2>Flag syntax is <code>xyz</code> (set) or <code>-xyz</code> (clear) or <code>xy-z</code> (set <code>xy</code>, clear <code>z</code>).</td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Empty strings:</b></td></tr>

				<tr><td><code>^</code></td><td>at beginning of text or line (<code>m</code>=true)</td></tr>

				<tr><td><code>$</code></td><td>at end of text (like <code>\z</code> not <code>\Z</code>) or line (<code>m</code>=true)</td></tr>

				<tr><td><code>\A</code></td><td>at beginning of text</td></tr>

				<tr><td><code>\b</code></td><td>at word boundary (<code>\w</code> on one side and <code>\W</code>, <code>\A</code>, or <code>\z</code> on the other)</td></tr>

				<tr><td><code>\B</code></td><td>not a word boundary</td></tr>

				<tr><td><code><font color=#808080>\G</font></code></td><td>at beginning of subtext being searched  <font size=-2>PCRE</font></td></tr>

				<tr><td><code><font color=#808080>\G</font></code></td><td>at end of last match  <font size=-2>PERL</font></td></tr>

				<tr><td><code><font color=#808080>\Z</font></code></td><td>at end of text, or before newline at end of text </td></tr>

				<tr><td><code>\z</code></td><td>at end of text</td></tr>

				<tr><td><code><font color=#808080>(?=re)</font></code></td><td>before text matching <code>re</code> </td></tr>

				<tr><td><code><font color=#808080>(?!re)</font></code></td><td>before text not matching <code>re</code> </td></tr>

				<tr><td><code><font color=#808080>(?&lt;=re)</font></code></td><td>after text matching <code>re</code> </td></tr>

				<tr><td><code><font color=#808080>(?&lt;!re)</font></code></td><td>after text not matching <code>re</code> </td></tr>

				<tr><td><code><font color=#808080>re&amp;</font></code></td><td>before text matching <code>re</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>re@=</font></code></td><td>before text matching <code>re</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>re@!</font></code></td><td>before text not matching <code>re</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>re@&lt;=</font></code></td><td>after text matching <code>re</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>re@&lt;!</font></code></td><td>after text not matching <code>re</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\zs</font></code></td><td>sets start of match (= \K)  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\ze</font></code></td><td>sets end of match  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%^</font></code></td><td>beginning of file  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%$</font></code></td><td>end of file  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%V</font></code></td><td>on screen  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%#</font></code></td><td>cursor position  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%'m</font></code></td><td>mark <code>m</code> position  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%23l</font></code></td><td>in line 23  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%23c</font></code></td><td>in column 23  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%23v</font></code></td><td>in virtual column 23  <font size=-2>VIM</font></td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Escape sequences:</b></td></tr>

				<tr><td><code>\a</code></td><td>bell (≡ <code>\007</code>)</td></tr>

				<tr><td><code>\f</code></td><td>form feed (≡ <code>\014</code>)</td></tr>

				<tr><td><code>\t</code></td><td>horizontal tab (≡ <code>\011</code>)</td></tr>

				<tr><td><code>\n</code></td><td>newline (≡ <code>\012</code>)</td></tr>

				<tr><td><code>\r</code></td><td>carriage return (≡ <code>\015</code>)</td></tr>

				<tr><td><code>\v</code></td><td>vertical tab character (≡ <code>\013</code>)</td></tr>

				<tr><td><code>\*</code></td><td>literal <code>*</code>, for any punctuation character <code>*</code></td></tr>

				<tr><td><code>\123</code></td><td>octal character code (up to three digits)</td></tr>

				<tr><td><code>\x7F</code></td><td>hex character code (exactly two digits)</td></tr>

				<tr><td><code>\x{10FFFF}</code></td><td>hex character code</td></tr>

				<tr><td><code>\C</code></td><td>match a single byte even in UTF-8 mode</td></tr>

				<tr><td><code>\Q...\E</code></td><td>literal text <code>...</code> even if <code>...</code> has punctuation</td></tr>

				<tr><td></td></tr>

				<tr><td><code><font color=#808080>\1</font></code></td><td>backreference </td></tr>

				<tr><td><code><font color=#808080>\b</font></code></td><td>backspace  (use <code>\010</code>)</td></tr>

				<tr><td><code><font color=#808080>\cK</font></code></td><td>control char ^K  (use <code>\001</code> etc)</td></tr>

				<tr><td><code><font color=#808080>\e</font></code></td><td>escape  (use <code>\033</code>)</td></tr>

				<tr><td><code><font color=#808080>\g1</font></code></td><td>backreference </td></tr>

				<tr><td><code><font color=#808080>\g{1}</font></code></td><td>backreference </td></tr>

				<tr><td><code><font color=#808080>\g{+1}</font></code></td><td>backreference </td></tr>

				<tr><td><code><font color=#808080>\g{-1}</font></code></td><td>backreference </td></tr>

				<tr><td><code><font color=#808080>\g{name}</font></code></td><td>named backreference </td></tr>

				<tr><td><code><font color=#808080>\g&lt;name&gt;</font></code></td><td>subroutine call </td></tr>

				<tr><td><code><font color=#808080>\g'name'</font></code></td><td>subroutine call </td></tr>

				<tr><td><code><font color=#808080>\k&lt;name&gt;</font></code></td><td>named backreference </td></tr>

				<tr><td><code><font color=#808080>\k'name'</font></code></td><td>named backreference </td></tr>

				<tr><td><code><font color=#808080>\lX</font></code></td><td>lowercase <code>X</code> </td></tr>

				<tr><td><code><font color=#808080>\ux</font></code></td><td>uppercase <code>x</code> </td></tr>

				<tr><td><code><font color=#808080>\L...\E</font></code></td><td>lowercase text <code>...</code> </td></tr>

				<tr><td><code><font color=#808080>\K</font></code></td><td>reset beginning of <code>$0</code> </td></tr>

				<tr><td><code><font color=#808080>\N{name}</font></code></td><td>named Unicode character </td></tr>

				<tr><td><code><font color=#808080>\R</font></code></td><td>line break </td></tr>

				<tr><td><code><font color=#808080>\U...\E</font></code></td><td>upper case text <code>...</code> </td></tr>

				<tr><td><code><font color=#808080>\X</font></code></td><td>extended Unicode sequence </td></tr>

				<tr><td></td></tr>

				<tr><td><code><font color=#808080>\%d123</font></code></td><td>decimal character 123  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%xFF</font></code></td><td>hex character FF  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%o123</font></code></td><td>octal character 123  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%u1234</font></code></td><td>Unicode character 0x1234  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\%U12345678</font></code></td><td>Unicode character 0x12345678  <font size=-2>VIM</font></td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Character class elements:</b></td></tr>

				<tr><td><code>x</code></td><td>single character</td></tr>

				<tr><td><code>A-Z</code></td><td>character range (inclusive)</td></tr>

				<tr><td><code>\d</code></td><td>Perl character class</td></tr>

				<tr><td><code>[:foo:]</code></td><td>ASCII character class <code>foo</code></td></tr>

				<tr><td><code>\p{Foo}</code></td><td>Unicode character class <code>Foo</code></td></tr>

				<tr><td><code>\pF</code></td><td>Unicode character class <code>F</code> (one-letter name)</td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Named character classes as character class elements:</b></td></tr>

				<tr><td><code>[\d]</code></td><td>digits (≡ <code>\d</code>)</td></tr>

				<tr><td><code>[^\d]</code></td><td>not digits (≡ <code>\D</code>)</td></tr>

				<tr><td><code>[\D]</code></td><td>not digits (≡ <code>\D</code>)</td></tr>

				<tr><td><code>[^\D]</code></td><td>not not digits (≡ <code>\d</code>)</td></tr>

				<tr><td><code>[[:name:]]</code></td><td>named ASCII class inside character class (≡ <code>[:name:]</code>)</td></tr>

				<tr><td><code>[^[:name:]]</code></td><td>named ASCII class inside negated character class (≡ <code>[:^name:]</code>)</td></tr>

				<tr><td><code>[\p{Name}]</code></td><td>named Unicode property inside character class (≡ <code>\p{Name}</code>)</td></tr>

				<tr><td><code>[^\p{Name}]</code></td><td>named Unicode property inside negated character class (≡ <code>\P{Name}</code>)</td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Perl character classes:</b></td></tr>

				<tr><td><code>\d</code></td><td>digits (≡ <code>[0-9]</code>)</td></tr>

				<tr><td><code>\D</code></td><td>not digits (≡ <code>[^0-9]</code>)</td></tr>

				<tr><td><code>\s</code></td><td>whitespace (≡ <code>[\t\n\f\r ]</code>)</td></tr>

				<tr><td><code>\S</code></td><td>not whitespace (≡ <code>[^\t\n\f\r ]</code>)</td></tr>

				<tr><td><code>\w</code></td><td>word characters (≡ <code>[0-9A-Za-z_]</code>)</td></tr>

				<tr><td><code>\W</code></td><td>not word characters (≡ <code>[^0-9A-Za-z_]</code>)</td></tr>

				<tr><td></td></tr>

				<tr><td><code><font color=#808080>\h</font></code></td><td>horizontal space </td></tr>

				<tr><td><code><font color=#808080>\H</font></code></td><td>not horizontal space </td></tr>

				<tr><td><code><font color=#808080>\v</font></code></td><td>vertical space </td></tr>

				<tr><td><code><font color=#808080>\V</font></code></td><td>not vertical space </td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>ASCII character classes:</b></td></tr>

				<tr><td><code>[[:alnum:]]</code></td><td>alphanumeric (≡ <code>[0-9A-Za-z]</code>)</td></tr>

				<tr><td><code>[[:alpha:]]</code></td><td>alphabetic (≡ <code>[A-Za-z]</code>)</td></tr>

				<tr><td><code>[[:ascii:]]</code></td><td>ASCII (≡ <code>[\x00-\x7F]</code>)</td></tr>

				<tr><td><code>[[:blank:]]</code></td><td>blank (≡ <code>[\t ]</code>)</td></tr>

				<tr><td><code>[[:cntrl:]]</code></td><td>control (≡ <code>[\x00-\x1F\x7F]</code>)</td></tr>

				<tr><td><code>[[:digit:]]</code></td><td>digits (≡ <code>[0-9]</code>)</td></tr>

				<tr><td><code>[[:graph:]]</code></td><td>graphical (≡ <code>[!-~] == [A-Za-z0-9!"#$%&amp;'()*+,\-./:;&lt;=&gt;?@[\\\]^_`{|}~]</code>)</td></tr>

				<tr><td><code>[[:lower:]]</code></td><td>lower case (≡ <code>[a-z]</code>)</td></tr>

				<tr><td><code>[[:print:]]</code></td><td>printable (≡ <code>[ -~] == [ [:graph:]]</code>)</td></tr>

				<tr><td><code>[[:punct:]]</code></td><td>punctuation (≡ <code>[!-/:-@[-`{-~]</code>)</td></tr>

				<tr><td><code>[[:space:]]</code></td><td>whitespace (≡ <code>[\t\n\v\f\r ]</code>)</td></tr>

				<tr><td><code>[[:upper:]]</code></td><td>upper case (≡ <code>[A-Z]</code>)</td></tr>

				<tr><td><code>[[:word:]]</code></td><td>word characters (≡ <code>[0-9A-Za-z_]</code>)</td></tr>

				<tr><td><code>[[:xdigit:]]</code></td><td>hex digit (≡ <code>[0-9A-Fa-f]</code>)</td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Unicode character class names--general category:</b></td></tr>

				<tr><td><code>C</code></td><td>other</td></tr>

				<tr><td><code>Cc</code></td><td>control</td></tr>

				<tr><td><code>Cf</code></td><td>format</td></tr>

				<tr><td><code><font color=#808080>Cn</font></code></td><td>unassigned code points </td></tr>

				<tr><td><code>Co</code></td><td>private use</td></tr>

				<tr><td><code>Cs</code></td><td>surrogate</td></tr>

				<tr><td><code>L</code></td><td>letter</td></tr>

				<tr><td><code><font color=#808080>LC</font></code></td><td>cased letter </td></tr>

				<tr><td><code><font color=#808080>L&amp;</font></code></td><td>cased letter </td></tr>

				<tr><td><code>Ll</code></td><td>lowercase letter</td></tr>

				<tr><td><code>Lm</code></td><td>modifier letter</td></tr>

				<tr><td><code>Lo</code></td><td>other letter</td></tr>

				<tr><td><code>Lt</code></td><td>titlecase letter</td></tr>

				<tr><td><code>Lu</code></td><td>uppercase letter</td></tr>

				<tr><td><code>M</code></td><td>mark</td></tr>

				<tr><td><code>Mc</code></td><td>spacing mark</td></tr>

				<tr><td><code>Me</code></td><td>enclosing mark</td></tr>

				<tr><td><code>Mn</code></td><td>non-spacing mark</td></tr>

				<tr><td><code>N</code></td><td>number</td></tr>

				<tr><td><code>Nd</code></td><td>decimal number</td></tr>

				<tr><td><code>Nl</code></td><td>letter number</td></tr>

				<tr><td><code>No</code></td><td>other number</td></tr>

				<tr><td><code>P</code></td><td>punctuation</td></tr>

				<tr><td><code>Pc</code></td><td>connector punctuation</td></tr>

				<tr><td><code>Pd</code></td><td>dash punctuation</td></tr>

				<tr><td><code>Pe</code></td><td>close punctuation</td></tr>

				<tr><td><code>Pf</code></td><td>final punctuation</td></tr>

				<tr><td><code>Pi</code></td><td>initial punctuation</td></tr>

				<tr><td><code>Po</code></td><td>other punctuation</td></tr>

				<tr><td><code>Ps</code></td><td>open punctuation</td></tr>

				<tr><td><code>S</code></td><td>symbol</td></tr>

				<tr><td><code>Sc</code></td><td>currency symbol</td></tr>

				<tr><td><code>Sk</code></td><td>modifier symbol</td></tr>

				<tr><td><code>Sm</code></td><td>math symbol</td></tr>

				<tr><td><code>So</code></td><td>other symbol</td></tr>

				<tr><td><code>Z</code></td><td>separator</td></tr>

				<tr><td><code>Zl</code></td><td>line separator</td></tr>

				<tr><td><code>Zp</code></td><td>paragraph separator</td></tr>

				<tr><td><code>Zs</code></td><td>space separator</td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Unicode character class names--scripts:</b></td></tr>

				<tr><td><code>Arabic</code></td><td>Arabic</td></tr>

				<tr><td><code>Armenian</code></td><td>Armenian</td></tr>

				<tr><td><code>Balinese</code></td><td>Balinese</td></tr>

				<tr><td><code>Bamum</code></td><td>Bamum</td></tr>

				<tr><td><code>Batak</code></td><td>Batak</td></tr>

				<tr><td><code>Bengali</code></td><td>Bengali</td></tr>

				<tr><td><code>Bopomofo</code></td><td>Bopomofo</td></tr>

				<tr><td><code>Brahmi</code></td><td>Brahmi</td></tr>

				<tr><td><code>Braille</code></td><td>Braille</td></tr>

				<tr><td><code>Buginese</code></td><td>Buginese</td></tr>

				<tr><td><code>Buhid</code></td><td>Buhid</td></tr>

				<tr><td><code>Canadian_Aboriginal</code></td><td>Canadian Aboriginal</td></tr>

				<tr><td><code>Carian</code></td><td>Carian</td></tr>

				<tr><td><code>Chakma</code></td><td>Chakma</td></tr>

				<tr><td><code>Cham</code></td><td>Cham</td></tr>

				<tr><td><code>Cherokee</code></td><td>Cherokee</td></tr>

				<tr><td><code>Common</code></td><td>characters not specific to one script</td></tr>

				<tr><td><code>Coptic</code></td><td>Coptic</td></tr>

				<tr><td><code>Cuneiform</code></td><td>Cuneiform</td></tr>

				<tr><td><code>Cypriot</code></td><td>Cypriot</td></tr>

				<tr><td><code>Cyrillic</code></td><td>Cyrillic</td></tr>

				<tr><td><code>Deseret</code></td><td>Deseret</td></tr>

				<tr><td><code>Devanagari</code></td><td>Devanagari</td></tr>

				<tr><td><code>Egyptian_Hieroglyphs</code></td><td>Egyptian Hieroglyphs</td></tr>

				<tr><td><code>Ethiopic</code></td><td>Ethiopic</td></tr>

				<tr><td><code>Georgian</code></td><td>Georgian</td></tr>

				<tr><td><code>Glagolitic</code></td><td>Glagolitic</td></tr>

				<tr><td><code>Gothic</code></td><td>Gothic</td></tr>

				<tr><td><code>Greek</code></td><td>Greek</td></tr>

				<tr><td><code>Gujarati</code></td><td>Gujarati</td></tr>

				<tr><td><code>Gurmukhi</code></td><td>Gurmukhi</td></tr>

				<tr><td><code>Han</code></td><td>Han</td></tr>

				<tr><td><code>Hangul</code></td><td>Hangul</td></tr>

				<tr><td><code>Hanunoo</code></td><td>Hanunoo</td></tr>

				<tr><td><code>Hebrew</code></td><td>Hebrew</td></tr>

				<tr><td><code>Hiragana</code></td><td>Hiragana</td></tr>

				<tr><td><code>Imperial_Aramaic</code></td><td>Imperial Aramaic</td></tr>

				<tr><td><code>Inherited</code></td><td>inherit script from previous character</td></tr>

				<tr><td><code>Inscriptional_Pahlavi</code></td><td>Inscriptional Pahlavi</td></tr>

				<tr><td><code>Inscriptional_Parthian</code></td><td>Inscriptional Parthian</td></tr>

				<tr><td><code>Javanese</code></td><td>Javanese</td></tr>

				<tr><td><code>Kaithi</code></td><td>Kaithi</td></tr>

				<tr><td><code>Kannada</code></td><td>Kannada</td></tr>

				<tr><td><code>Katakana</code></td><td>Katakana</td></tr>

				<tr><td><code>Kayah_Li</code></td><td>Kayah Li</td></tr>

				<tr><td><code>Kharoshthi</code></td><td>Kharoshthi</td></tr>

				<tr><td><code>Khmer</code></td><td>Khmer</td></tr>

				<tr><td><code>Lao</code></td><td>Lao</td></tr>

				<tr><td><code>Latin</code></td><td>Latin</td></tr>

				<tr><td><code>Lepcha</code></td><td>Lepcha</td></tr>

				<tr><td><code>Limbu</code></td><td>Limbu</td></tr>

				<tr><td><code>Linear_B</code></td><td>Linear B</td></tr>

				<tr><td><code>Lycian</code></td><td>Lycian</td></tr>

				<tr><td><code>Lydian</code></td><td>Lydian</td></tr>

				<tr><td><code>Malayalam</code></td><td>Malayalam</td></tr>

				<tr><td><code>Mandaic</code></td><td>Mandaic</td></tr>

				<tr><td><code>Meetei_Mayek</code></td><td>Meetei Mayek</td></tr>

				<tr><td><code>Meroitic_Cursive</code></td><td>Meroitic Cursive</td></tr>

				<tr><td><code>Meroitic_Hieroglyphs</code></td><td>Meroitic Hieroglyphs</td></tr>

				<tr><td><code>Miao</code></td><td>Miao</td></tr>

				<tr><td><code>Mongolian</code></td><td>Mongolian</td></tr>

				<tr><td><code>Myanmar</code></td><td>Myanmar</td></tr>

				<tr><td><code>New_Tai_Lue</code></td><td>New Tai Lue (aka Simplified Tai Lue)</td></tr>

				<tr><td><code>Nko</code></td><td>Nko</td></tr>

				<tr><td><code>Ogham</code></td><td>Ogham</td></tr>

				<tr><td><code>Ol_Chiki</code></td><td>Ol Chiki</td></tr>

				<tr><td><code>Old_Italic</code></td><td>Old Italic</td></tr>

				<tr><td><code>Old_Persian</code></td><td>Old Persian</td></tr>

				<tr><td><code>Old_South_Arabian</code></td><td>Old South Arabian</td></tr>

				<tr><td><code>Old_Turkic</code></td><td>Old Turkic</td></tr>

				<tr><td><code>Oriya</code></td><td>Oriya</td></tr>

				<tr><td><code>Osmanya</code></td><td>Osmanya</td></tr>

				<tr><td><code>Phags_Pa</code></td><td>'Phags Pa</td></tr>

				<tr><td><code>Phoenician</code></td><td>Phoenician</td></tr>

				<tr><td><code>Rejang</code></td><td>Rejang</td></tr>

				<tr><td><code>Runic</code></td><td>Runic</td></tr>

				<tr><td><code>Saurashtra</code></td><td>Saurashtra</td></tr>

				<tr><td><code>Sharada</code></td><td>Sharada</td></tr>

				<tr><td><code>Shavian</code></td><td>Shavian</td></tr>

				<tr><td><code>Sinhala</code></td><td>Sinhala</td></tr>

				<tr><td><code>Sora_Sompeng</code></td><td>Sora Sompeng</td></tr>

				<tr><td><code>Sundanese</code></td><td>Sundanese</td></tr>

				<tr><td><code>Syloti_Nagri</code></td><td>Syloti Nagri</td></tr>

				<tr><td><code>Syriac</code></td><td>Syriac</td></tr>

				<tr><td><code>Tagalog</code></td><td>Tagalog</td></tr>

				<tr><td><code>Tagbanwa</code></td><td>Tagbanwa</td></tr>

				<tr><td><code>Tai_Le</code></td><td>Tai Le</td></tr>

				<tr><td><code>Tai_Tham</code></td><td>Tai Tham</td></tr>

				<tr><td><code>Tai_Viet</code></td><td>Tai Viet</td></tr>

				<tr><td><code>Takri</code></td><td>Takri</td></tr>

				<tr><td><code>Tamil</code></td><td>Tamil</td></tr>

				<tr><td><code>Telugu</code></td><td>Telugu</td></tr>

				<tr><td><code>Thaana</code></td><td>Thaana</td></tr>

				<tr><td><code>Thai</code></td><td>Thai</td></tr>

				<tr><td><code>Tibetan</code></td><td>Tibetan</td></tr>

				<tr><td><code>Tifinagh</code></td><td>Tifinagh</td></tr>

				<tr><td><code>Ugaritic</code></td><td>Ugaritic</td></tr>

				<tr><td><code>Vai</code></td><td>Vai</td></tr>

				<tr><td><code>Yi</code></td><td>Yi</td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Vim character classes:</b></td></tr>

				<tr><td><code><font color=#808080>\i</font></code></td><td>identifier character  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\I</font></code></td><td><code>\i</code> except digits  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\k</font></code></td><td>keyword character  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\K</font></code></td><td><code>\k</code> except digits  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\f</font></code></td><td>file name character  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\F</font></code></td><td><code>\f</code> except digits  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\p</font></code></td><td>printable character  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\P</font></code></td><td><code>\p</code> except digits  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\s</font></code></td><td>whitespace character (≡ <code>[ \t]</code>)  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\S</font></code></td><td>non-white space character (≡ <code>[^ \t]</code>)  <font size=-2>VIM</font></td></tr>

				<tr><td><code>\d</code></td><td>digits (≡ <code>[0-9]</code>) <font size=-2>VIM</font></td></tr>

				<tr><td><code>\D</code></td><td>not <code>\d</code> <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\x</font></code></td><td>hex digits (≡ <code>[0-9A-Fa-f]</code>)  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\X</font></code></td><td>not <code>\x</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\o</font></code></td><td>octal digits (≡ <code>[0-7]</code>)  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\O</font></code></td><td>not <code>\o</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code>\w</code></td><td>word character <font size=-2>VIM</font></td></tr>

				<tr><td><code>\W</code></td><td>not <code>\w</code> <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\h</font></code></td><td>head of word character  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\H</font></code></td><td>not <code>\h</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\a</font></code></td><td>alphabetic  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\A</font></code></td><td>not <code>\a</code>  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\l</font></code></td><td>lowercase  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\L</font></code></td><td>not lowercase  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\u</font></code></td><td>uppercase  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\U</font></code></td><td>not uppercase  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\_x</font></code></td><td><code>\x</code> plus newline, for any <code>x</code>  <font size=-2>VIM</font></td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Vim flags:</b></td></tr>

				<tr><td><code><font color=#808080>\c</font></code></td><td>ignore case  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\C</font></code></td><td>match case  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\m</font></code></td><td>magic  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\M</font></code></td><td>nomagic  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\v</font></code></td><td>verymagic  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\V</font></code></td><td>verynomagic  <font size=-2>VIM</font></td></tr>

				<tr><td><code><font color=#808080>\Z</font></code></td><td>ignore differences in Unicode combining characters  <font size=-2>VIM</font></td></tr>

				<tr><td></td></tr>

				<tr><td colspan=2><b>Magic:</b></td></tr>

				<tr><td><code><font color=#808080>(?{code})</font></code></td><td>arbitrary Perl code  <font size=-2>PERL</font></td></tr>

				<tr><td><code><font color=#808080>(??{code})</font></code></td><td>postponed arbitrary Perl code  <font size=-2>PERL</font></td></tr>

				<tr><td><code><font color=#808080>(?n)</font></code></td><td>recursive call to regexp capturing group <code>n</code> </td></tr>

				<tr><td><code><font color=#808080>(?+n)</font></code></td><td>recursive call to relative group <code>+n</code> </td></tr>

				<tr><td><code><font color=#808080>(?-n)</font></code></td><td>recursive call to relative group <code>-n</code> </td></tr>

				<tr><td><code><font color=#808080>(?C)</font></code></td><td>PCRE callout  <font size=-2>PCRE</font></td></tr>

				<tr><td><code><font color=#808080>(?R)</font></code></td><td>recursive call to entire regexp (≡ <code>(?0)</code>) </td></tr>

				<tr><td><code><font color=#808080>(?&amp;name)</font></code></td><td>recursive call to named group </td></tr>

				<tr><td><code><font color=#808080>(?P=name)</font></code></td><td>named backreference </td></tr>

				<tr><td><code><font color=#808080>(?P&gt;name)</font></code></td><td>recursive call to named group </td></tr>

				<tr><td><code><font color=#808080>(?(cond)true|false)</font></code></td><td>conditional branch </td></tr>

				<tr><td><code><font color=#808080>(?(cond)true)</font></code></td><td>conditional branch </td></tr>

				<tr><td><code><font color=#808080>(*ACCEPT)</font></code></td><td>make regexps more like Prolog </td></tr>

				<tr><td><code><font color=#808080>(*COMMIT)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*F)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*FAIL)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*MARK)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*PRUNE)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*SKIP)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*THEN)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*ANY)</font></code></td><td>set newline convention </td></tr>

				<tr><td><code><font color=#808080>(*ANYCRLF)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*CR)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*CRLF)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*LF)</font></code></td><td></td></tr>

				<tr><td><code><font color=#808080>(*BSR_ANYCRLF)</font></code></td><td>set \R convention  <font size=-2>PCRE</font></td></tr>

				<tr><td><code><font color=#808080>(*BSR_UNICODE)</font></code></td><td> <font size=-2>PCRE</font></td></tr>

				<tr><td></td></tr>

				</table>

				</body>

				</html>

399

third_party/re2/doc/syntax.txt vendored

 @ -1,399 +0,0 @@
 RE2 regular expression syntax reference
 -------------------------------------
 Single characters:
 .	any character, possibly including newline (s=true)
 [xyz]	character class
 [^xyz]	negated character class
 \d	Perl character class
 \D	negated Perl character class
 [[:alpha:]]	ASCII character class
 [[:^alpha:]]	negated ASCII character class
 \pN	Unicode character class (one-letter name)
 \p{Greek}	Unicode character class
 \PN	negated Unicode character class (one-letter name)
 \P{Greek}	negated Unicode character class
 Composites:
 xy	«x» followed by «y»
 x|y	«x» or «y» (prefer «x»)
 Repetitions:
 x*	zero or more «x», prefer more
 x+	one or more «x», prefer more
 x?	zero or one «x», prefer one
 x{n,m}	«n» or «n»+1 or ... or «m» «x», prefer more
 x{n,}	«n» or more «x», prefer more
 x{n}	exactly «n» «x»
 x*?	zero or more «x», prefer fewer
 x+?	one or more «x», prefer fewer
 x??	zero or one «x», prefer zero
 x{n,m}?	«n» or «n»+1 or ... or «m» «x», prefer fewer
 x{n,}?	«n» or more «x», prefer fewer
 x{n}?	exactly «n» «x»
 x{}	(== x*) NOT SUPPORTED vim
 x{-}	(== x*?) NOT SUPPORTED vim
 x{-n}	(== x{n}?) NOT SUPPORTED vim
 x=	(== x?) NOT SUPPORTED vim
 Implementation restriction: The counting forms «x{n,m}», «x{n,}», and «x{n}»
 reject forms that create a minimum or maximum repetition count above 1000.
 Unlimited repetitions are not subject to this restriction.
 Possessive repetitions:
 x*+	zero or more «x», possessive NOT SUPPORTED
 x++	one or more «x», possessive NOT SUPPORTED
 x?+	zero or one «x», possessive NOT SUPPORTED
 x{n,m}+	«n» or ... or «m» «x», possessive NOT SUPPORTED
 x{n,}+	«n» or more «x», possessive NOT SUPPORTED
 x{n}+	exactly «n» «x», possessive NOT SUPPORTED
 Grouping:
 (re)	numbered capturing group (submatch)
 (?P<name>re)	named & numbered capturing group (submatch)
 (?<name>re)	named & numbered capturing group (submatch) NOT SUPPORTED
 (?'name're)	named & numbered capturing group (submatch) NOT SUPPORTED
 (?:re)	non-capturing group
 (?flags)	set flags within current group; non-capturing
 (?flags:re)	set flags during re; non-capturing
 (?#text)	comment NOT SUPPORTED
 (?|x|y|z)	branch numbering reset NOT SUPPORTED
 (?>re)	possessive match of «re» NOT SUPPORTED
 re@>	possessive match of «re» NOT SUPPORTED vim
 %(re)	non-capturing group NOT SUPPORTED vim
 Flags:
 i	case-insensitive (default false)
 m	multi-line mode: «^» and «$» match begin/end line in addition to begin/end text (default false)
 s	let «.» match «\n» (default false)
 U	ungreedy: swap meaning of «x*» and «x*?», «x+» and «x+?», etc (default false)
 Flag syntax is «xyz» (set) or «-xyz» (clear) or «xy-z» (set «xy», clear «z»).
 Empty strings:
 ^	at beginning of text or line («m»=true)
 $	at end of text (like «\z» not «\Z») or line («m»=true)
 \A	at beginning of text
 \b	at ASCII word boundary («\w» on one side and «\W», «\A», or «\z» on the other)
 \B	not at ASCII word boundary
 \G	at beginning of subtext being searched NOT SUPPORTED pcre
 \G	at end of last match NOT SUPPORTED perl
 \Z	at end of text, or before newline at end of text NOT SUPPORTED
 \z	at end of text
 (?=re)	before text matching «re» NOT SUPPORTED
 (?!re)	before text not matching «re» NOT SUPPORTED
 (?<=re)	after text matching «re» NOT SUPPORTED
 (?<!re)	after text not matching «re» NOT SUPPORTED
 re&	before text matching «re» NOT SUPPORTED vim
 re@=	before text matching «re» NOT SUPPORTED vim
 re@!	before text not matching «re» NOT SUPPORTED vim
 re@<=	after text matching «re» NOT SUPPORTED vim
 re@<!	after text not matching «re» NOT SUPPORTED vim
 \zs	sets start of match (= \K) NOT SUPPORTED vim
 \ze	sets end of match NOT SUPPORTED vim
 \%^	beginning of file NOT SUPPORTED vim
 \%$	end of file NOT SUPPORTED vim
 \%V	on screen NOT SUPPORTED vim
 \%#	cursor position NOT SUPPORTED vim
 \%'m	mark «m» position NOT SUPPORTED vim
 \%23l	in line 23 NOT SUPPORTED vim
 \%23c	in column 23 NOT SUPPORTED vim
 \%23v	in virtual column 23 NOT SUPPORTED vim
 Escape sequences:
 \a	bell (== \007)
 \f	form feed (== \014)
 \t	horizontal tab (== \011)
 \n	newline (== \012)
 \r	carriage return (== \015)
 \v	vertical tab character (== \013)
 \*	literal «*», for any punctuation character «*»
 \123	octal character code (up to three digits)
 \x7F	hex character code (exactly two digits)
 \x{10FFFF}	hex character code
 \C	match a single byte even in UTF-8 mode
 \Q...\E	literal text «...» even if «...» has punctuation
 \1	backreference NOT SUPPORTED
 \b	backspace NOT SUPPORTED (use «\010»)
 \cK	control char ^K NOT SUPPORTED (use «\001» etc)
 \e	escape NOT SUPPORTED (use «\033»)
 \g1	backreference NOT SUPPORTED
 \g{1}	backreference NOT SUPPORTED
 \g{+1}	backreference NOT SUPPORTED
 \g{-1}	backreference NOT SUPPORTED
 \g{name}	named backreference NOT SUPPORTED
 \g<name>	subroutine call NOT SUPPORTED
 \g'name'	subroutine call NOT SUPPORTED
 \k<name>	named backreference NOT SUPPORTED
 \k'name'	named backreference NOT SUPPORTED
 \lX	lowercase «X» NOT SUPPORTED
 \ux	uppercase «x» NOT SUPPORTED
 \L...\E	lowercase text «...» NOT SUPPORTED
 \K	reset beginning of «$0» NOT SUPPORTED
 \N{name}	named Unicode character NOT SUPPORTED
 \R	line break NOT SUPPORTED
 \U...\E	upper case text «...» NOT SUPPORTED
 \X	extended Unicode sequence NOT SUPPORTED
 \%d123	decimal character 123 NOT SUPPORTED vim
 \%xFF	hex character FF NOT SUPPORTED vim
 \%o123	octal character 123 NOT SUPPORTED vim
 \%u1234	Unicode character 0x1234 NOT SUPPORTED vim
 \%U12345678	Unicode character 0x12345678 NOT SUPPORTED vim
 Character class elements:
 x	single character
 A-Z	character range (inclusive)
 \d	Perl character class
 [:foo:]	ASCII character class «foo»
 \p{Foo}	Unicode character class «Foo»
 \pF	Unicode character class «F» (one-letter name)
 Named character classes as character class elements:
 [\d]	digits (== \d)
 [^\d]	not digits (== \D)
 [\D]	not digits (== \D)
 [^\D]	not not digits (== \d)
 [[:name:]]	named ASCII class inside character class (== [:name:])
 [^[:name:]]	named ASCII class inside negated character class (== [:^name:])
 [\p{Name}]	named Unicode property inside character class (== \p{Name})
 [^\p{Name}]	named Unicode property inside negated character class (== \P{Name})
 Perl character classes (all ASCII-only):
 \d	digits (== [0-9])
 \D	not digits (== [^0-9])
 \s	whitespace (== [\t\n\f\r ])
 \S	not whitespace (== [^\t\n\f\r ])
 \w	word characters (== [0-9A-Za-z_])
 \W	not word characters (== [^0-9A-Za-z_])
 \h	horizontal space NOT SUPPORTED
 \H	not horizontal space NOT SUPPORTED
 \v	vertical space NOT SUPPORTED
 \V	not vertical space NOT SUPPORTED
 ASCII character classes:
 [[:alnum:]]	alphanumeric (== [0-9A-Za-z])
 [[:alpha:]]	alphabetic (== [A-Za-z])
 [[:ascii:]]	ASCII (== [\x00-\x7F])
 [[:blank:]]	blank (== [\t ])
 [[:cntrl:]]	control (== [\x00-\x1F\x7F])
 [[:digit:]]	digits (== [0-9])
 [[:graph:]]	graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
 [[:lower:]]	lower case (== [a-z])
 [[:print:]]	printable (== [ -~] == [ [:graph:]])
 [[:punct:]]	punctuation (== [!-/:-@[-`{-~])
 [[:space:]]	whitespace (== [\t\n\v\f\r ])
 [[:upper:]]	upper case (== [A-Z])
 [[:word:]]	word characters (== [0-9A-Za-z_])
 [[:xdigit:]]	hex digit (== [0-9A-Fa-f])
 Unicode character class names--general category:
 C	other
 Cc	control
 Cf	format
 Cn	unassigned code points NOT SUPPORTED
 Co	private use
 Cs	surrogate
 L	letter
 LC	cased letter NOT SUPPORTED
 L&	cased letter NOT SUPPORTED
 Ll	lowercase letter
 Lm	modifier letter
 Lo	other letter
 Lt	titlecase letter
 Lu	uppercase letter
 M	mark
 Mc	spacing mark
 Me	enclosing mark
 Mn	non-spacing mark
 N	number
 Nd	decimal number
 Nl	letter number
 No	other number
 P	punctuation
 Pc	connector punctuation
 Pd	dash punctuation
 Pe	close punctuation
 Pf	final punctuation
 Pi	initial punctuation
 Po	other punctuation
 Ps	open punctuation
 S	symbol
 Sc	currency symbol
 Sk	modifier symbol
 Sm	math symbol
 So	other symbol
 Z	separator
 Zl	line separator
 Zp	paragraph separator
 Zs	space separator
 Unicode character class names--scripts:
 Arabic	Arabic
 Armenian	Armenian
 Balinese	Balinese
 Bamum	Bamum
 Batak	Batak
 Bengali	Bengali
 Bopomofo	Bopomofo
 Brahmi	Brahmi
 Braille	Braille
 Buginese	Buginese
 Buhid	Buhid
 Canadian_Aboriginal	Canadian Aboriginal
 Carian	Carian
 Chakma	Chakma
 Cham	Cham
 Cherokee	Cherokee
 Common	characters not specific to one script
 Coptic	Coptic
 Cuneiform	Cuneiform
 Cypriot	Cypriot
 Cyrillic	Cyrillic
 Deseret	Deseret
 Devanagari	Devanagari
 Egyptian_Hieroglyphs	Egyptian Hieroglyphs
 Ethiopic	Ethiopic
 Georgian	Georgian
 Glagolitic	Glagolitic
 Gothic	Gothic
 Greek	Greek
 Gujarati	Gujarati
 Gurmukhi	Gurmukhi
 Han	Han
 Hangul	Hangul
 Hanunoo	Hanunoo
 Hebrew	Hebrew
 Hiragana	Hiragana
 Imperial_Aramaic	Imperial Aramaic
 Inherited	inherit script from previous character
 Inscriptional_Pahlavi	Inscriptional Pahlavi
 Inscriptional_Parthian	Inscriptional Parthian
 Javanese	Javanese
 Kaithi	Kaithi
 Kannada	Kannada
 Katakana	Katakana
 Kayah_Li	Kayah Li
 Kharoshthi	Kharoshthi
 Khmer	Khmer
 Lao	Lao
 Latin	Latin
 Lepcha	Lepcha
 Limbu	Limbu
 Linear_B	Linear B
 Lycian	Lycian
 Lydian	Lydian
 Malayalam	Malayalam
 Mandaic	Mandaic
 Meetei_Mayek	Meetei Mayek
 Meroitic_Cursive	Meroitic Cursive
 Meroitic_Hieroglyphs	Meroitic Hieroglyphs
 Miao	Miao
 Mongolian	Mongolian
 Myanmar	Myanmar
 New_Tai_Lue	New Tai Lue (aka Simplified Tai Lue)
 Nko	Nko
 Ogham	Ogham
 Ol_Chiki	Ol Chiki
 Old_Italic	Old Italic
 Old_Persian	Old Persian
 Old_South_Arabian	Old South Arabian
 Old_Turkic	Old Turkic
 Oriya	Oriya
 Osmanya	Osmanya
 Phags_Pa	'Phags Pa
 Phoenician	Phoenician
 Rejang	Rejang
 Runic	Runic
 Saurashtra	Saurashtra
 Sharada	Sharada
 Shavian	Shavian
 Sinhala	Sinhala
 Sora_Sompeng	Sora Sompeng
 Sundanese	Sundanese
 Syloti_Nagri	Syloti Nagri
 Syriac	Syriac
 Tagalog	Tagalog
 Tagbanwa	Tagbanwa
 Tai_Le	Tai Le
 Tai_Tham	Tai Tham
 Tai_Viet	Tai Viet
 Takri	Takri
 Tamil	Tamil
 Telugu	Telugu
 Thaana	Thaana
 Thai	Thai
 Tibetan	Tibetan
 Tifinagh	Tifinagh
 Ugaritic	Ugaritic
 Vai	Vai
 Yi	Yi
 Vim character classes:
 \i	identifier character NOT SUPPORTED vim
 \I	«\i» except digits NOT SUPPORTED vim
 \k	keyword character NOT SUPPORTED vim
 \K	«\k» except digits NOT SUPPORTED vim
 \f	file name character NOT SUPPORTED vim
 \F	«\f» except digits NOT SUPPORTED vim
 \p	printable character NOT SUPPORTED vim
 \P	«\p» except digits NOT SUPPORTED vim
 \s	whitespace character (== [ \t]) NOT SUPPORTED vim
 \S	non-white space character (== [^ \t]) NOT SUPPORTED vim
 \d	digits (== [0-9]) vim
 \D	not «\d» vim
 \x	hex digits (== [0-9A-Fa-f]) NOT SUPPORTED vim
 \X	not «\x» NOT SUPPORTED vim
 \o	octal digits (== [0-7]) NOT SUPPORTED vim
 \O	not «\o» NOT SUPPORTED vim
 \w	word character vim
 \W	not «\w» vim
 \h	head of word character NOT SUPPORTED vim
 \H	not «\h» NOT SUPPORTED vim
 \a	alphabetic NOT SUPPORTED vim
 \A	not «\a» NOT SUPPORTED vim
 \l	lowercase NOT SUPPORTED vim
 \L	not lowercase NOT SUPPORTED vim
 \u	uppercase NOT SUPPORTED vim
 \U	not uppercase NOT SUPPORTED vim
 \_x	«\x» plus newline, for any «x» NOT SUPPORTED vim
 Vim flags:
 \c	ignore case NOT SUPPORTED vim
 \C	match case NOT SUPPORTED vim
 \m	magic NOT SUPPORTED vim
 \M	nomagic NOT SUPPORTED vim
 \v	verymagic NOT SUPPORTED vim
 \V	verynomagic NOT SUPPORTED vim
 \Z	ignore differences in Unicode combining characters NOT SUPPORTED vim
 Magic:
 (?{code})	arbitrary Perl code NOT SUPPORTED perl
 (??{code})	postponed arbitrary Perl code NOT SUPPORTED perl
 (?n)	recursive call to regexp capturing group «n» NOT SUPPORTED
 (?+n)	recursive call to relative group «+n» NOT SUPPORTED
 (?-n)	recursive call to relative group «-n» NOT SUPPORTED
 (?C)	PCRE callout NOT SUPPORTED pcre
 (?R)	recursive call to entire regexp (== (?0)) NOT SUPPORTED
 (?&name)	recursive call to named group NOT SUPPORTED
 (?P=name)	named backreference NOT SUPPORTED
 (?P>name)	recursive call to named group NOT SUPPORTED
 (?(cond)true|false)	conditional branch NOT SUPPORTED
 (?(cond)true)	conditional branch NOT SUPPORTED
 (*ACCEPT)	make regexps more like Prolog NOT SUPPORTED
 (*COMMIT)	NOT SUPPORTED
 (*F)	NOT SUPPORTED
 (*FAIL)	NOT SUPPORTED
 (*MARK)	NOT SUPPORTED
 (*PRUNE)	NOT SUPPORTED
 (*SKIP)	NOT SUPPORTED
 (*THEN)	NOT SUPPORTED
 (*ANY)	set newline convention NOT SUPPORTED
 (*ANYCRLF)	NOT SUPPORTED
 (*CR)	NOT SUPPORTED
 (*CRLF)	NOT SUPPORTED
 (*LF)	NOT SUPPORTED
 (*BSR_ANYCRLF)	set \R convention NOT SUPPORTED pcre
 (*BSR_UNICODE)	NOT SUPPORTED pcre

BIN
third_party/re2/doc/xkcd.png vendored

Binary file not shown.

Before

(image error) Size: 26 KiB

104

third_party/re2/lib/git/commit-msg.hook vendored

 @ -1,104 +0,0 @@
 #!/bin/sh
 # From Gerrit Code Review 2.2.1
 #
 # Part of Gerrit Code Review (http://code.google.com/p/gerrit/)
 #
 # Copyright (C) 2009 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 CHANGE_ID_AFTER="Bug|Issue"
 MSG="$1"
 # Check for, and add if missing, a unique Change-Id
 #
 add_ChangeId() {
 	clean_message=`sed -e '
 		/^diff --git a\/.*/{
 			s///
 			q
 		}
 		/^Signed-off-by:/d
 		/^#/d
 	' "$MSG" | git stripspace`
 	if test -z "$clean_message"
 	then
 		return
 	fi
 	if grep -i '^Change-Id:' "$MSG" >/dev/null
 	then
 		return
 	fi
 	id=`_gen_ChangeId`
 	perl -e '
 		$MSG = shift;
 		$id = shift;
 		$CHANGE_ID_AFTER = shift;
 		undef $/;
 		open(I, $MSG); $_ = <I>; close I;
 		s|^diff --git a/.*||ms;
 		s|^#.*$||mg;
 		exit unless $_;
 		@message = split /\n/;
 		$haveFooter = 0;
 		$startFooter = @message;
 		for($line = @message - 1; $line >= 0; $line--) {
 			$_ = $message[$line];
 			if (/^[a-zA-Z0-9-]+:/ && !m,^[a-z0-9-]+://,) {
 				$haveFooter++;
 				next;
 			}
 			next if /^[ []/;
 			$startFooter = $line if ($haveFooter && /^\r?$/);
 			last;
 		}
 		@footer = @message[$startFooter+1..@message];
 		@message = @message[0..$startFooter];
 		push(@footer, "") unless @footer;
 		for ($line = 0; $line < @footer; $line++) {
 			$_ = $footer[$line];
 			next if /^($CHANGE_ID_AFTER):/i;
 			last;
 		}
 		splice(@footer, $line, 0, "Change-Id: I$id");
 		$_ = join("\n", @message, @footer);
 		open(O, ">$MSG"); print O; close O;
 	' "$MSG" "$id" "$CHANGE_ID_AFTER"
 }
 _gen_ChangeIdInput() {
 	echo "tree `git write-tree`"
 	if parent=`git rev-parse HEAD^0 2>/dev/null`
 	then
 		echo "parent $parent"
 	fi
 	echo "author `git var GIT_AUTHOR_IDENT`"
 	echo "committer `git var GIT_COMMITTER_IDENT`"
 	echo
 	printf '%s' "$clean_message"
 }
 _gen_ChangeId() {
 	_gen_ChangeIdInput |
 	git hash-object -t commit --stdin
 }
 add_ChangeId

18

third_party/re2/libre2.symbols vendored

 @ -1,18 +0,0 @@
 {
 	global:
 		# re2::RE2*
 		_ZN3re23RE2*;
 		_ZNK3re23RE2*;
 		# re2::StringPiece*
 		_ZN3re211StringPiece*;
 		_ZNK3re211StringPiece*;
 		# operator<<(std::ostream&, re2::StringPiece const&)
 		_ZlsRSoRKN3re211StringPieceE;
 		# re2::FilteredRE2*
 		_ZN3re211FilteredRE2*;
 		_ZNK3re211FilteredRE2*;
 		# flags
 		_ZN3re2*FLAGS_*;
 	local:
 		*;
 };

19

third_party/re2/libre2.symbols.darwin vendored

 @ -1,19 +0,0 @@
 # Linker doesn't like these unmangled:
 # re2::RE2*
 __ZN3re23RE2*
 __ZNK3re23RE2*
 # re2::StringPiece*
 __ZN3re211StringPiece*
 __ZNK3re211StringPiece*
 # operator<<(std::ostream&, re2::StringPiece const&)
 # Seen with libstdc++ on 10.8 and below:
 # __ZlsRSoRKN3re211StringPieceE
 # Seen with libc++ on 10.9 and above:
 # __ZlsRNSt3__113basic_ostreamIcNS_11char_traitsIcEEEERKN3re211StringPieceE
 # Note that "ls" means operator<<, so this is not overly broad.
 __Zls*RKN3re211StringPieceE
 # re2::FilteredRE2*
 __ZN3re211FilteredRE2*
 __ZNK3re211FilteredRE2*
 # flags
 __ZN3re2*FLAGS_*

98

third_party/re2/re2.gyp vendored

 @ -11,12 +11,12 @@
       'target_name': 're2',
       'type': 'static_library',
       'include_dirs': [
         '.',
         'src',
         '<(DEPTH)',
       ],
       'direct_dependent_settings': {
         'include_dirs': [
           '.',
           'src',
           '<(DEPTH)',
         ],
       },
 @ -24,53 +24,53 @@
         '<(DEPTH)/base/third_party/dynamic_annotations/dynamic_annotations.gyp:dynamic_annotations',
       ],
       'sources': [
         're2/bitstate.cc',
         're2/compile.cc',
         're2/dfa.cc',
         're2/filtered_re2.cc',
         're2/filtered_re2.h',
         're2/mimics_pcre.cc',
         're2/nfa.cc',
         're2/onepass.cc',
         're2/parse.cc',
         're2/perl_groups.cc',
         're2/prefilter.cc',
         're2/prefilter.h',
         're2/prefilter_tree.cc',
         're2/prefilter_tree.h',
         're2/prog.cc',
         're2/prog.h',
         're2/re2.cc',
         're2/re2.h',
         're2/regexp.cc',
         're2/regexp.h',
         're2/set.cc',
         're2/set.h',
         're2/simplify.cc',
         're2/stringpiece.cc',
         're2/stringpiece.h',
         're2/tostring.cc',
         're2/unicode_casefold.cc',
         're2/unicode_casefold.h',
         're2/unicode_groups.cc',
         're2/unicode_groups.h',
         're2/variadic_function.h',
         're2/walker-inl.h',
         'util/atomicops.h',
         'util/flags.h',
         'util/hash.cc',
         'util/logging.cc',
         'util/logging.h',
         'util/mutex.h',
         'util/rune.cc',
         'util/sparse_array.h',
         'util/sparse_set.h',
         'util/stringprintf.cc',
         'util/strutil.cc',
         'util/utf.h',
         'util/util.h',
         'util/valgrind.cc',
         'util/valgrind.h',
         'src/re2/bitstate.cc',
         'src/re2/compile.cc',
         'src/re2/dfa.cc',
         'src/re2/filtered_re2.cc',
         'src/re2/filtered_re2.h',
         'src/re2/mimics_pcre.cc',
         'src/re2/nfa.cc',
         'src/re2/onepass.cc',
         'src/re2/parse.cc',
         'src/re2/perl_groups.cc',
         'src/re2/prefilter.cc',
         'src/re2/prefilter.h',
         'src/re2/prefilter_tree.cc',
         'src/re2/prefilter_tree.h',
         'src/re2/prog.cc',
         'src/re2/prog.h',
         'src/re2/re2.cc',
         'src/re2/re2.h',
         'src/re2/regexp.cc',
         'src/re2/regexp.h',
         'src/re2/set.cc',
         'src/re2/set.h',
         'src/re2/simplify.cc',
         'src/re2/stringpiece.cc',
         'src/re2/stringpiece.h',
         'src/re2/tostring.cc',
         'src/re2/unicode_casefold.cc',
         'src/re2/unicode_casefold.h',
         'src/re2/unicode_groups.cc',
         'src/re2/unicode_groups.h',
         'src/re2/variadic_function.h',
         'src/re2/walker-inl.h',
         'src/util/atomicops.h',
         'src/util/flags.h',
         'src/util/hash.cc',
         'src/util/logging.cc',
         'src/util/logging.h',
         'src/util/mutex.h',
         'src/util/rune.cc',
         'src/util/sparse_array.h',
         'src/util/sparse_set.h',
         'src/util/stringprintf.cc',
         'src/util/strutil.cc',
         'src/util/utf.h',
         'src/util/util.h',
         'src/util/valgrind.cc',
         'src/util/valgrind.h',
       ],
       'conditions': [
         ['OS=="win"', {

									
										10

third_party/re2/re2.pc
									
										vendored
									
				@ -1,10 +0,0 @@

				prefix=@prefix@

				exec_prefix=${prefix}

				includedir=${prefix}/include

				libdir=${exec_prefix}/lib

				Name: re2

				Description: RE2 is a fast, safe, thread-friendly regular expression engine.

				Version: 0.0.0

				Cflags: -I${includedir}

				Libs: -L${libdir} -lre2 -pthread

									
										381

third_party/re2/re2/bitstate.cc
									
										vendored
									
				@ -1,381 +0,0 @@

				// Copyright 2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Tested by search_test.cc, exhaustive_test.cc, tester.cc

				// Prog::SearchBitState is a regular expression search with submatch

				// tracking for small regular expressions and texts.  Like

				// testing/backtrack.cc, it allocates a bit vector with (length of

				// text) * (length of prog) bits, to make sure it never explores the

				// same (character position, instruction) state multiple times.  This

				// limits the search to run in time linear in the length of the text.

				//

				// Unlike testing/backtrack.cc, SearchBitState is not recursive

				// on the text.

				//

				// SearchBitState is a fast replacement for the NFA code on small

				// regexps and texts when SearchOnePass cannot be used.

				#include "re2/prog.h"

				#include "re2/regexp.h"

				namespace re2 {

				struct Job {

				  int id;

				  int arg;

				  const char* p;

				};

				class BitState {

				 public:

				  explicit BitState(Prog* prog);

				  ~BitState();

				  // The usual Search prototype.

				  // Can only call Search once per BitState.

				  bool Search(const StringPiece& text, const StringPiece& context,

				              bool anchored, bool longest,

				              StringPiece* submatch, int nsubmatch);

				 private:

				  inline bool ShouldVisit(int id, const char* p);

				  void Push(int id, const char* p, int arg);

				  bool GrowStack();

				  bool TrySearch(int id, const char* p);

				  // Search parameters

				  Prog* prog_;              // program being run

				  StringPiece text_;        // text being searched

				  StringPiece context_;     // greater context of text being searched

				  bool anchored_;           // whether search is anchored at text.begin()

				  bool longest_;            // whether search wants leftmost-longest match

				  bool endmatch_;           // whether match must end at text.end()

				  StringPiece *submatch_;   // submatches to fill in

				  int nsubmatch_;           //   # of submatches to fill in

				  // Search state

				  const char** cap_;        // capture registers

				  int ncap_;

				  static const int VisitedBits = 32;

				  uint32 *visited_;         // bitmap: (Inst*, char*) pairs already backtracked

				  int nvisited_;            //   # of words in bitmap

				  Job *job_;                // stack of text positions to explore

				  int njob_;

				  int maxjob_;

				};

				BitState::BitState(Prog* prog)

				  : prog_(prog),

				    anchored_(false),

				    longest_(false),

				    endmatch_(false),

				    submatch_(NULL),

				    nsubmatch_(0),

				    cap_(NULL),

				    ncap_(0),

				    visited_(NULL),

				    nvisited_(0),

				    job_(NULL),

				    njob_(0),

				    maxjob_(0) {

				}

				BitState::~BitState() {

				  delete[] visited_;

				  delete[] job_;

				  delete[] cap_;

				}

				// Should the search visit the pair ip, p?

				// If so, remember that it was visited so that the next time,

				// we don't repeat the visit.

				bool BitState::ShouldVisit(int id, const char* p) {

				  size_t n = id * (text_.size() + 1) + (p - text_.begin());

				  if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))

				    return false;

				  visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1));

				  return true;

				}

				// Grow the stack.

				bool BitState::GrowStack() {

				  // VLOG(0) << "Reallocate.";

				  maxjob_ *= 2;

				  Job* newjob = new Job[maxjob_];

				  memmove(newjob, job_, njob_*sizeof job_[0]);

				  delete[] job_;

				  job_ = newjob;

				  if (njob_ >= maxjob_) {

				    LOG(DFATAL) << "Job stack overflow.";

				    return false;

				  }

				  return true;

				}

				// Push the triple (id, p, arg) onto the stack, growing it if necessary.

				void BitState::Push(int id, const char* p, int arg) {

				  if (njob_ >= maxjob_) {

				    if (!GrowStack())

				      return;

				  }

				  int op = prog_->inst(id)->opcode();

				  if (op == kInstFail)

				    return;

				  // Only check ShouldVisit when arg == 0.

				  // When arg > 0, we are continuing a previous visit.

				  if (arg == 0 && !ShouldVisit(id, p))

				    return;

				  Job* j = &job_[njob_++];

				  j->id = id;

				  j->p = p;

				  j->arg = arg;

				}

				// Try a search from instruction id0 in state p0.

				// Return whether it succeeded.

				bool BitState::TrySearch(int id0, const char* p0) {

				  bool matched = false;

				  const char* end = text_.end();

				  njob_ = 0;

				  Push(id0, p0, 0);

				  while (njob_ > 0) {

				    // Pop job off stack.

				    --njob_;

				    int id = job_[njob_].id;

				    const char* p = job_[njob_].p;

				    int arg = job_[njob_].arg;

				    // Optimization: rather than push and pop,

				    // code that is going to Push and continue

				    // the loop simply updates ip, p, and arg

				    // and jumps to CheckAndLoop.  We have to

				    // do the ShouldVisit check that Push

				    // would have, but we avoid the stack

				    // manipulation.

				    if (0) {

				    CheckAndLoop:

				      if (!ShouldVisit(id, p))

				        continue;

				    }

				    // Visit ip, p.

				    // VLOG(0) << "Job: " << ip->id() << " "

				    //         << (p - text_.begin()) << " " << arg;

				    Prog::Inst* ip = prog_->inst(id);

				    switch (ip->opcode()) {

				      case kInstFail:

				        return false;

				      default:

				        LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg;

				        return false;

				      case kInstAlt:

				        // Cannot just

				        //   Push(ip->out1(), p, 0);

				        //   Push(ip->out(), p, 0);

				        // If, during the processing of ip->out(), we encounter

				        // ip->out1() via another path, we want to process it then.

				        // Pushing it here will inhibit that.  Instead, re-push

				        // ip with arg==1 as a reminder to push ip->out1() later.

				        switch (arg) {

				          case 0:

				            Push(id, p, 1);  // come back when we're done

				            id = ip->out();

				            goto CheckAndLoop;

				          case 1:

				            // Finished ip->out(); try ip->out1().

				            arg = 0;

				            id = ip->out1();

				            goto CheckAndLoop;

				        }

				        LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;

				        continue;

				      case kInstAltMatch:

				        // One opcode is byte range; the other leads to match.

				        if (ip->greedy(prog_)) {

				          // out1 is the match

				          Push(ip->out1(), p, 0);

				          id = ip->out1();

				          p = end;

				          goto CheckAndLoop;

				        }

				        // out is the match - non-greedy

				        Push(ip->out(), end, 0);

				        id = ip->out();

				        goto CheckAndLoop;

				      case kInstByteRange: {

				        int c = -1;

				        if (p < end)

				          c = *p & 0xFF;

				        if (ip->Matches(c)) {

				          id = ip->out();

				          p++;

				          goto CheckAndLoop;

				        }

				        continue;

				      }

				      case kInstCapture:

				        switch (arg) {

				          case 0:

				            if (0 <= ip->cap() && ip->cap() < ncap_) {

				              // Capture p to register, but save old value.

				              Push(id, cap_[ip->cap()], 1);  // come back when we're done

				              cap_[ip->cap()] = p;

				            }

				            // Continue on.

				            id = ip->out();

				            goto CheckAndLoop;

				          case 1:

				            // Finished ip->out(); restore the old value.

				            cap_[ip->cap()] = p;

				            continue;

				        }

				        LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;

				        continue;

				      case kInstEmptyWidth:

				        if (ip->empty() & ~Prog::EmptyFlags(context_, p))

				          continue;

				        id = ip->out();

				        goto CheckAndLoop;

				      case kInstNop:

				        id = ip->out();

				        goto CheckAndLoop;

				      case kInstMatch: {

				        if (endmatch_ && p != text_.end())

				          continue;

				        // VLOG(0) << "Found match.";

				        // We found a match.  If the caller doesn't care

				        // where the match is, no point going further.

				        if (nsubmatch_ == 0)

				          return true;

				        // Record best match so far.

				        // Only need to check end point, because this entire

				        // call is only considering one start position.

				        matched = true;

				        cap_[1] = p;

				        if (submatch_[0].data() == NULL ||

				            (longest_ && p > submatch_[0].end())) {

				          for (int i = 0; i < nsubmatch_; i++)

				            submatch_[i].set(cap_[2*i],

				                             static_cast<int>(cap_[2*i+1] - cap_[2*i]));

				        }

				        // If going for first match, we're done.

				        if (!longest_)

				          return true;

				        // If we used the entire text, no longer match is possible.

				        if (p == text_.end())

				          return true;

				        // Otherwise, continue on in hope of a longer match.

				        continue;

				      }

				    }

				  }

				  return matched;

				}

				// Search text (within context) for prog_.

				bool BitState::Search(const StringPiece& text, const StringPiece& context,

				                      bool anchored, bool longest,

				                      StringPiece* submatch, int nsubmatch) {

				  // Search parameters.

				  text_ = text;

				  context_ = context;

				  if (context_.begin() == NULL)

				    context_ = text;

				  if (prog_->anchor_start() && context_.begin() != text.begin())

				    return false;

				  if (prog_->anchor_end() && context_.end() != text.end())

				    return false;

				  anchored_ = anchored || prog_->anchor_start();

				  longest_ = longest || prog_->anchor_end();

				  endmatch_ = prog_->anchor_end();

				  submatch_ = submatch;

				  nsubmatch_ = nsubmatch;

				  for (int i = 0; i < nsubmatch_; i++)

				    submatch_[i] = NULL;

				  // Allocate scratch space.

				  nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits;

				  visited_ = new uint32[nvisited_];

				  memset(visited_, 0, nvisited_*sizeof visited_[0]);

				  // VLOG(0) << "nvisited_ = " << nvisited_;

				  ncap_ = 2*nsubmatch;

				  if (ncap_ < 2)

				    ncap_ = 2;

				  cap_ = new const char*[ncap_];

				  memset(cap_, 0, ncap_*sizeof cap_[0]);

				  maxjob_ = 256;

				  job_ = new Job[maxjob_];

				  // Anchored search must start at text.begin().

				  if (anchored_) {

				    cap_[0] = text.begin();

				    return TrySearch(prog_->start(), text.begin());

				  }

				  // Unanchored search, starting from each possible text position.

				  // Notice that we have to try the empty string at the end of

				  // the text, so the loop condition is p <= text.end(), not p < text.end().

				  // This looks like it's quadratic in the size of the text,

				  // but we are not clearing visited_ between calls to TrySearch,

				  // so no work is duplicated and it ends up still being linear.

				  for (const char* p = text.begin(); p <= text.end(); p++) {

				    cap_[0] = p;

				    if (TrySearch(prog_->start(), p))  // Match must be leftmost; done.

				      return true;

				  }

				  return false;

				}

				// Bit-state search.

				bool Prog::SearchBitState(const StringPiece& text,

				                          const StringPiece& context,

				                          Anchor anchor,

				                          MatchKind kind,

				                          StringPiece* match,

				                          int nmatch) {

				  // If full match, we ask for an anchored longest match

				  // and then check that match[0] == text.

				  // So make sure match[0] exists.

				  StringPiece sp0;

				  if (kind == kFullMatch) {

				    anchor = kAnchored;

				    if (nmatch < 1) {

				      match = &sp0;

				      nmatch = 1;

				    }

				  }

				  // Run the search.

				  BitState b(this);

				  bool anchored = anchor == kAnchored;

				  bool longest = kind != kFirstMatch;

				  if (!b.Search(text, context, anchored, longest, match, nmatch))

				    return false;

				  if (kind == kFullMatch && match[0].end() != text.end())

				    return false;

				  return true;

				}

				}  // namespace re2

1151

third_party/re2/re2/compile.cc vendored

File diff suppressed because it is too large Load Diff

2112

third_party/re2/re2/dfa.cc vendored

File diff suppressed because it is too large Load Diff

									
										107

third_party/re2/re2/filtered_re2.cc
									
										vendored
									
				@ -1,107 +0,0 @@

				// Copyright 2009 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				#include <string>

				#include "util/util.h"

				#include "re2/filtered_re2.h"

				#include "re2/prefilter.h"

				#include "re2/prefilter_tree.h"

				namespace re2 {

				FilteredRE2::FilteredRE2()

				    : compiled_(false),

				      prefilter_tree_(new PrefilterTree()) {

				}

				FilteredRE2::~FilteredRE2() {

				  for (size_t i = 0; i < re2_vec_.size(); i++)

				    delete re2_vec_[i];

				  delete prefilter_tree_;

				}

				RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,

				                                const RE2::Options& options, int* id) {

				  RE2* re = new RE2(pattern, options);

				  RE2::ErrorCode code = re->error_code();

				  if (!re->ok()) {

				    if (options.log_errors()) {

				      LOG(ERROR) << "Couldn't compile regular expression, skipping: "

				                 << re << " due to error " << re->error();

				    }

				    delete re;

				  } else {

				    *id = static_cast<int>(re2_vec_.size());

				    re2_vec_.push_back(re);

				  }

				  return code;

				}

				void FilteredRE2::Compile(vector<string>* atoms) {

				  if (compiled_ || re2_vec_.size() == 0) {

				    LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size();

				    return;

				  }

				  for (size_t i = 0; i < re2_vec_.size(); i++) {

				    Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]);

				    prefilter_tree_->Add(prefilter);

				  }

				  atoms->clear();

				  prefilter_tree_->Compile(atoms);

				  compiled_ = true;

				}

				int FilteredRE2::SlowFirstMatch(const StringPiece& text) const {

				  for (size_t i = 0; i < re2_vec_.size(); i++)

				    if (RE2::PartialMatch(text, *re2_vec_[i]))

				      return static_cast<int>(i);

				  return -1;

				}

				int FilteredRE2::FirstMatch(const StringPiece& text,

				                            const vector<int>& atoms) const {

				  if (!compiled_) {

				    LOG(DFATAL) << "FirstMatch called before Compile";

				    return -1;

				  }

				  vector<int> regexps;

				  prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);

				  for (size_t i = 0; i < regexps.size(); i++)

				    if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))

				      return regexps[i];

				  return -1;

				}

				bool FilteredRE2::AllMatches(

				    const StringPiece& text,

				    const vector<int>& atoms,

				    vector<int>* matching_regexps) const {

				  matching_regexps->clear();

				  vector<int> regexps;

				  prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);

				  for (size_t i = 0; i < regexps.size(); i++)

				    if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))

				      matching_regexps->push_back(regexps[i]);

				  return !matching_regexps->empty();

				}

				void FilteredRE2::AllPotentials(

				    const vector<int>& atoms,

				    vector<int>* potential_regexps) const {

				  prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps);

				}

				void FilteredRE2::RegexpsGivenStrings(const vector<int>& matched_atoms,

				                                      vector<int>* passed_regexps) {

				  prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps);

				}

				void FilteredRE2::PrintPrefilter(int regexpid) {

				  prefilter_tree_->PrintPrefilter(regexpid);

				}

				}  // namespace re2

									
										109

third_party/re2/re2/filtered_re2.h
									
										vendored
									
				@ -1,109 +0,0 @@

				// Copyright 2009 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.

				// It provides a prefilter mechanism that helps in cutting down the

				// number of regexps that need to be actually searched.

				//

				// By design, it does not include a string matching engine. This is to

				// allow the user of the class to use their favorite string match

				// engine. The overall flow is: Add all the regexps using Add, then

				// Compile the FilteredRE2. The compile returns strings that need to

				// be matched. Note that all returned strings are lowercase. For

				// applying regexps to a search text, the caller does the string

				// matching using the strings returned. When doing the string match,

				// note that the caller has to do that on lower cased version of the

				// search text. Then call FirstMatch or AllMatches with a vector of

				// indices of strings that were found in the text to get the actual

				// regexp matches.

				#ifndef RE2_FILTERED_RE2_H_

				#define RE2_FILTERED_RE2_H_

				#include <vector>

				#include "re2/re2.h"

				namespace re2 {

				using std::vector;

				class PrefilterTree;

				class FilteredRE2 {

				 public:

				  FilteredRE2();

				  ~FilteredRE2();

				  // Uses RE2 constructor to create a RE2 object (re). Returns

				  // re->error_code(). If error_code is other than NoError, then re is

				  // deleted and not added to re2_vec_.

				  RE2::ErrorCode Add(const StringPiece& pattern,

				                     const RE2::Options& options,

				                     int *id);

				  // Prepares the regexps added by Add for filtering.  Returns a set

				  // of strings that the caller should check for in candidate texts.

				  // The returned strings are lowercased. When doing string matching,

				  // the search text should be lowercased first to find matching

				  // strings from the set of strings returned by Compile.  Call after

				  // all Add calls are done.

				  void Compile(vector<string>* strings_to_match);

				  // Returns the index of the first matching regexp.

				  // Returns -1 on no match. Can be called prior to Compile.

				  // Does not do any filtering: simply tries to Match the

				  // regexps in a loop.

				  int SlowFirstMatch(const StringPiece& text) const;

				  // Returns the index of the first matching regexp.

				  // Returns -1 on no match. Compile has to be called before

				  // calling this.

				  int FirstMatch(const StringPiece& text,

				                 const vector<int>& atoms) const;

				  // Returns the indices of all matching regexps, after first clearing

				  // matched_regexps.

				  bool AllMatches(const StringPiece& text,

				                  const vector<int>& atoms,

				                  vector<int>* matching_regexps) const;

				  // Returns the indices of all potentially matching regexps after first

				  // clearing potential_regexps.

				  // A regexp is potentially matching if it passes the filter.

				  // If a regexp passes the filter it may still not match.

				  // A regexp that does not pass the filter is guaranteed to not match.

				  void AllPotentials(const vector<int>& atoms,

				                     vector<int>* potential_regexps) const;

				  // The number of regexps added.

				  int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }

				 private:

				  // Get the individual RE2 objects. Useful for testing.

				  RE2* GetRE2(int regexpid) const { return re2_vec_[regexpid]; }

				  // Print prefilter.

				  void PrintPrefilter(int regexpid);

				  // Useful for testing and debugging.

				  void RegexpsGivenStrings(const vector<int>& matched_atoms,

				                           vector<int>* passed_regexps);

				  // All the regexps in the FilteredRE2.

				  vector<RE2*> re2_vec_;

				  // Has the FilteredRE2 been compiled using Compile()

				  bool compiled_;

				  // An AND-OR tree of string atoms used for filtering regexps.

				  PrefilterTree* prefilter_tree_;

				  //DISALLOW_COPY_AND_ASSIGN(FilteredRE2);

				  FilteredRE2(const FilteredRE2&);

				  void operator=(const FilteredRE2&);

				};

				}  // namespace re2

				#endif  // RE2_FILTERED_RE2_H_

									
										116

third_party/re2/re2/make_perl_groups.pl
									
										vendored
									
				@ -1,116 +0,0 @@

				#!/usr/bin/perl

				# Copyright 2008 The RE2 Authors.  All Rights Reserved.

				# Use of this source code is governed by a BSD-style

				# license that can be found in the LICENSE file.

				# Generate table entries giving character ranges

				# for POSIX/Perl character classes.  Rather than

				# figure out what the definition is, it is easier to ask

				# Perl about each letter from 0-128 and write down

				# its answer.

				@posixclasses = (

					"[:alnum:]",

					"[:alpha:]",

					"[:ascii:]",

					"[:blank:]",

					"[:cntrl:]",

					"[:digit:]",

					"[:graph:]",

					"[:lower:]",

					"[:print:]",

					"[:punct:]",

					"[:space:]",

					"[:upper:]",

					"[:word:]",

					"[:xdigit:]",

				);

				@perlclasses = (

					"\\d",

					"\\s",

					"\\w",

				);

				%overrides = (

					# Prior to Perl 5.18, \s did not match vertical tab.

					# RE2 preserves that original behaviour.

					"\\s:11" => 0,

				);

				sub ComputeClass($) {

				  my ($cname) = @_;

				  my @ranges;

				  my $regexp = qr/[$cname]/;

				  my $start = -1;

				  for (my $i=0; $i<=129; $i++) {

				    if ($i == 129) { $i = 256; }

				    if ($i <= 128 && ($overrides{"$cname:$i"} // chr($i) =~ $regexp)) {

				      if ($start < 0) {

				        $start = $i;

				      }

				    } else {

				      if ($start >= 0) {

				        push @ranges, [$start, $i-1];

				      }

				      $start = -1;

				    }

				  }

				  return @ranges;

				}

				sub PrintClass($$@) {

				  my ($cnum, $cname, @ranges) = @_;

				  print "static const URange16 code${cnum}[] = {  /* $cname */\n";

				  for (my $i=0; $i<@ranges; $i++) {

				    my @a = @{$ranges[$i]};

				    printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1];

				  }

				  print "};\n";

				  my $n = @ranges;

				  my $escname = $cname;

				  $escname =~ s/\\/\\\\/g;

				  $negname = $escname;

				  if ($negname =~ /:/) {

				    $negname =~ s/:/:^/;

				  } else {

				    $negname =~ y/a-z/A-Z/;

				  }

				  return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum, $n }";

				}

				my $cnum = 0;

				sub PrintClasses($@) {

				  my ($pname, @classes) = @_;

				  my @entries;

				  foreach my $cname (@classes) {

				    my @ranges = ComputeClass($cname);

				    push @entries, PrintClass(++$cnum, $cname, @ranges);

				  }

				  print "const UGroup ${pname}_groups[] = {\n";

				  foreach my $e (@entries) {

				    print "\t$e,\n";

				  }

				  print "};\n";

				  my $count = @entries;

				  print "const int num_${pname}_groups = $count;\n";

				}

				print <<EOF;

				// GENERATED BY make_perl_groups.pl; DO NOT EDIT.

				// make_perl_groups.pl >perl_groups.cc

				#include "re2/unicode_groups.h"

				namespace re2 {

				EOF

				PrintClasses("perl", @perlclasses);

				PrintClasses("posix", @posixclasses);

				print <<EOF;

				}  // namespace re2

				EOF

									
										147

third_party/re2/re2/make_unicode_casefold.py
									
										vendored
									
				@ -1,147 +0,0 @@

				#!/usr/bin/python

				# coding=utf-8

				#

				# Copyright 2008 The RE2 Authors.  All Rights Reserved.

				# Use of this source code is governed by a BSD-style

				# license that can be found in the LICENSE file.

				# See unicode_casefold.h for description of case folding tables.

				"""Generate C++ table for Unicode case folding."""

				import sys

				import unicode

				_header = """

				// GENERATED BY make_unicode_casefold.py; DO NOT EDIT.

				// make_unicode_casefold.py >unicode_casefold.cc

				#include "re2/unicode_casefold.h"

				namespace re2 {

				"""

				_trailer = """

				} // namespace re2

				"""

				def _Delta(a, b):

				  """Compute the delta for b - a.  Even/odd and odd/even

				     are handled specially, as described above."""

				  if a+1 == b:

				    if a%2 == 0:

				      return 'EvenOdd'

				    else:

				      return 'OddEven'

				  if a == b+1:

				    if a%2 == 0:

				      return 'OddEven'

				    else:

				      return 'EvenOdd'

				  return b - a

				def _AddDelta(a, delta):

				  """Return a + delta, handling EvenOdd and OddEven specially."""

				  if type(delta) == int:

				    return a+delta

				  if delta == 'EvenOdd':

				    if a%2 == 0:

				      return a+1

				    else:

				      return a-1

				  if delta == 'OddEven':

				    if a%2 == 1:

				      return a+1

				    else:

				      return a-1

				  print >>sys.stderr, "Bad Delta: ", delta

				  raise "Bad Delta"

				def _MakeRanges(pairs):

				  """Turn a list like [(65,97), (66, 98), ..., (90,122)]

				     into [(65, 90, +32)]."""

				  ranges = []

				  last = -100

				  def evenodd(last, a, b, r):

				    if a != last+1 or b != _AddDelta(a, r[2]):

				      return False

				    r[1] = a

				    return True

				  def evenoddpair(last, a, b, r):

				    if a != last+2:

				      return False

				    delta = r[2]

				    d = delta

				    if type(delta) is not str:

				      return False

				    if delta.endswith('Skip'):

				      d = delta[:-4]

				    else:

				      delta = d + 'Skip'

				    if b != _AddDelta(a, d):

				      return False

				    r[1] = a

				    r[2] = delta

				    return True

				  for a, b in pairs:

				    if ranges and evenodd(last, a, b, ranges[-1]):

				      pass

				    elif ranges and evenoddpair(last, a, b, ranges[-1]):

				      pass

				    else:

				      ranges.append([a, a, _Delta(a, b)])

				    last = a

				  return ranges

				# The maximum size of a case-folding group.

				# Case folding is implemented in parse.cc by a recursive process

				# with a recursion depth equal to the size of the largest

				# case-folding group, so it is important that this bound be small.

				# The current tables have no group bigger than 4.

				# If there are ever groups bigger than 10 or so, it will be

				# time to rework the code in parse.cc.

				MaxCasefoldGroup = 4

				def main():

				  lowergroups, casegroups = unicode.CaseGroups()

				  foldpairs = []

				  seen = {}

				  for c in casegroups:

				    if len(c) > MaxCasefoldGroup:

				      raise unicode.Error("casefold group too long: %s" % (c,))

				    for i in range(len(c)):

				      if c[i-1] in seen:

				        raise unicode.Error("bad casegroups %d -> %d" % (c[i-1], c[i]))

				      seen[c[i-1]] = True

				      foldpairs.append([c[i-1], c[i]])

				  lowerpairs = []

				  for lower, group in lowergroups.iteritems():

				    for g in group:

				      if g != lower:

				        lowerpairs.append([g, lower])

				  def printpairs(name, foldpairs):

				    foldpairs.sort()

				    foldranges = _MakeRanges(foldpairs)

				    print "// %d groups, %d pairs, %d ranges" % (len(casegroups), len(foldpairs), len(foldranges))

				    print "const CaseFold unicode_%s[] = {" % (name,)

				    for lo, hi, delta in foldranges:

				      print "\t{ %d, %d, %s }," % (lo, hi, delta)

				    print "};"

				    print "const int num_unicode_%s = %d;" % (name, len(foldranges),)

				    print ""

				  print _header

				  printpairs("casefold", foldpairs)

				  printpairs("tolower", lowerpairs)

				  print _trailer

				if __name__ == '__main__':

				  main()

									
										111

third_party/re2/re2/make_unicode_groups.py
									
										vendored
									
				@ -1,111 +0,0 @@

				#!/usr/bin/python

				# Copyright 2008 The RE2 Authors.  All Rights Reserved.

				# Use of this source code is governed by a BSD-style

				# license that can be found in the LICENSE file.

				"""Generate C++ tables for Unicode Script and Category groups."""

				import sys

				import unicode

				_header = """

				// GENERATED BY make_unicode_groups.py; DO NOT EDIT.

				// make_unicode_groups.py >unicode_groups.cc

				#include "re2/unicode_groups.h"

				namespace re2 {

				"""

				_trailer = """

				}  // namespace re2

				"""

				n16 = 0

				n32 = 0

				def MakeRanges(codes):

				  """Turn a list like [1,2,3,7,8,9] into a range list [[1,3], [7,9]]"""

				  ranges = []

				  last = -100

				  for c in codes:

				    if c == last+1:

				      ranges[-1][1] = c

				    else:

				      ranges.append([c, c])

				    last = c

				  return ranges

				def PrintRanges(type, name, ranges):

				  """Print the ranges as an array of type named name."""

				  print "static const %s %s[] = {" % (type, name,)

				  for lo, hi in ranges:

				    print "\t{ %d, %d }," % (lo, hi)

				  print "};"

				# def PrintCodes(type, name, codes):

				#   """Print the codes as an array of type named name."""

				#   print "static %s %s[] = {" % (type, name,)

				#   for c in codes:

				#     print "\t%d," % (c,)

				#   print "};"

				def PrintGroup(name, codes):

				  """Print the data structures for the group of codes.

				  Return a UGroup literal for the group."""

				  # See unicode_groups.h for a description of the data structure.

				  # Split codes into 16-bit ranges and 32-bit ranges.

				  range16 = MakeRanges([c for c in codes if c < 65536])

				  range32 = MakeRanges([c for c in codes if c >= 65536])

				  # Pull singleton ranges out of range16.

				  # code16 = [lo for lo, hi in range16 if lo == hi]

				  # range16 = [[lo, hi] for lo, hi in range16 if lo != hi]

				  global n16

				  global n32

				  n16 += len(range16)

				  n32 += len(range32)

				  ugroup = "{ \"%s\", +1" % (name,)

				  # if len(code16) > 0:

				  #   PrintCodes("uint16", name+"_code16", code16)

				  #   ugroup += ", %s_code16, %d" % (name, len(code16))

				  # else:

				  #   ugroup += ", 0, 0"

				  if len(range16) > 0:

				    PrintRanges("URange16", name+"_range16", range16)

				    ugroup += ", %s_range16, %d" % (name, len(range16))

				  else:

				    ugroup += ", 0, 0"

				  if len(range32) > 0:

				    PrintRanges("URange32", name+"_range32", range32)

				    ugroup += ", %s_range32, %d" % (name, len(range32))

				  else:

				    ugroup += ", 0, 0"

				  ugroup += " }"

				  return ugroup

				def main():

				  print _header

				  ugroups = []

				  for name, codes in unicode.Categories().iteritems():

				    ugroups.append(PrintGroup(name, codes))

				  for name, codes in unicode.Scripts().iteritems():

				    ugroups.append(PrintGroup(name, codes))

				  print "// %d 16-bit ranges, %d 32-bit ranges" % (n16, n32)

				  print "const UGroup unicode_groups[] = {";

				  ugroups.sort()

				  for ug in ugroups:

				    print "\t%s," % (ug,)

				  print "};"

				  print "const int num_unicode_groups = %d;" % (len(ugroups),)

				  print _trailer

				if __name__ == '__main__':

				  main()

									
										185

third_party/re2/re2/mimics_pcre.cc
									
										vendored
									
				@ -1,185 +0,0 @@

				// Copyright 2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Determine whether this library should match PCRE exactly

				// for a particular Regexp.  (If so, the testing framework can

				// check that it does.)

				//

				// This library matches PCRE except in these cases:

				//   * the regexp contains a repetition of an empty string,

				//     like (a*)* or (a*)+.  In this case, PCRE will treat

				//     the repetition sequence as ending with an empty string,

				//     while this library does not.

				//   * Perl and PCRE differ on whether \v matches \n.

				//     For historical reasons, this library implements the Perl behavior.

				//   * Perl and PCRE allow $ in one-line mode to match either the very

				//     end of the text or just before a \n at the end of the text.

				//     This library requires it to match only the end of the text.

				//   * Similarly, Perl and PCRE do not allow ^ in multi-line mode to

				//     match the end of the text if the last character is a \n.

				//     This library does allow it.

				//

				// Regexp::MimicsPCRE checks for any of these conditions.

				#include "util/util.h"

				#include "re2/regexp.h"

				#include "re2/walker-inl.h"

				namespace re2 {

				// Returns whether re might match an empty string.

				static bool CanBeEmptyString(Regexp *re);

				// Walker class to compute whether library handles a regexp

				// exactly as PCRE would.  See comment at top for conditions.

				class PCREWalker : public Regexp::Walker<bool> {

				 public:

				  PCREWalker() {}

				  bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args,

				                 int nchild_args);

				  bool ShortVisit(Regexp* re, bool a) {

				    // Should never be called: we use Walk not WalkExponential.

				    LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";

				    return a;

				  }

				};

				// Called after visiting each of re's children and accumulating

				// the return values in child_args.  So child_args contains whether

				// this library mimics PCRE for those subexpressions.

				bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,

				                           bool* child_args, int nchild_args) {

				  // If children failed, so do we.

				  for (int i = 0; i < nchild_args; i++)

				    if (!child_args[i])

				      return false;

				  // Otherwise look for other reasons to fail.

				  switch (re->op()) {

				    // Look for repeated empty string.

				    case kRegexpStar:

				    case kRegexpPlus:

				    case kRegexpQuest:

				      if (CanBeEmptyString(re->sub()[0]))

				        return false;

				      break;

				    case kRegexpRepeat:

				      if (re->max() == -1 && CanBeEmptyString(re->sub()[0]))

				        return false;

				      break;

				    // Look for \v

				    case kRegexpLiteral:

				      if (re->rune() == '\v')

				        return false;

				      break;

				    // Look for $ in single-line mode.

				    case kRegexpEndText:

				    case kRegexpEmptyMatch:

				      if (re->parse_flags() & Regexp::WasDollar)

				        return false;

				      break;

				    // Look for ^ in multi-line mode.

				    case kRegexpBeginLine:

				      // No condition: in single-line mode ^ becomes kRegexpBeginText.

				      return false;

				    default:

				      break;

				  }

				  // Not proven guilty.

				  return true;

				}

				// Returns whether this regexp's behavior will mimic PCRE's exactly.

				bool Regexp::MimicsPCRE() {

				  PCREWalker w;

				  return w.Walk(this, true);

				}

				// Walker class to compute whether a Regexp can match an empty string.

				// It is okay to overestimate.  For example, \b\B cannot match an empty

				// string, because \b and \B are mutually exclusive, but this isn't

				// that smart and will say it can.  Spurious empty strings

				// will reduce the number of regexps we sanity check against PCRE,

				// but they won't break anything.

				class EmptyStringWalker : public Regexp::Walker<bool> {

				 public:

				  EmptyStringWalker() { }

				  bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,

				                 bool* child_args, int nchild_args);

				  bool ShortVisit(Regexp* re, bool a) {

				    // Should never be called: we use Walk not WalkExponential.

				    LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";

				    return a;

				  }

				 private:

				  DISALLOW_COPY_AND_ASSIGN(EmptyStringWalker);

				};

				// Called after visiting re's children.  child_args contains the return

				// value from each of the children's PostVisits (i.e., whether each child

				// can match an empty string).  Returns whether this clause can match an

				// empty string.

				bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,

				                                  bool* child_args, int nchild_args) {

				  switch (re->op()) {

				    case kRegexpNoMatch:               // never empty

				    case kRegexpLiteral:

				    case kRegexpAnyChar:

				    case kRegexpAnyByte:

				    case kRegexpCharClass:

				    case kRegexpLiteralString:

				      return false;

				    case kRegexpEmptyMatch:            // always empty

				    case kRegexpBeginLine:             // always empty, when they match

				    case kRegexpEndLine:

				    case kRegexpNoWordBoundary:

				    case kRegexpWordBoundary:

				    case kRegexpBeginText:

				    case kRegexpEndText:

				    case kRegexpStar:                  // can always be empty

				    case kRegexpQuest:

				    case kRegexpHaveMatch:

				      return true;

				    case kRegexpConcat:                // can be empty if all children can

				      for (int i = 0; i < nchild_args; i++)

				        if (!child_args[i])

				          return false;

				      return true;

				    case kRegexpAlternate:             // can be empty if any child can

				      for (int i = 0; i < nchild_args; i++)

				        if (child_args[i])

				          return true;

				      return false;

				    case kRegexpPlus:                  // can be empty if the child can

				    case kRegexpCapture:

				      return child_args[0];

				    case kRegexpRepeat:                // can be empty if child can or is x{0}

				      return child_args[0] || re->min() == 0;

				  }

				  return false;

				}

				// Returns whether re can match an empty string.

				static bool CanBeEmptyString(Regexp* re) {

				  EmptyStringWalker w;

				  return w.Walk(re, true);

				}

				}  // namespace re2

									
										758

third_party/re2/re2/nfa.cc
									
										vendored
									
				@ -1,758 +0,0 @@

				// Copyright 2006-2007 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Tested by search_test.cc.

				//

				// Prog::SearchNFA, an NFA search.

				// This is an actual NFA like the theorists talk about,

				// not the pseudo-NFA found in backtracking regexp implementations.

				//

				// IMPLEMENTATION

				//

				// This algorithm is a variant of one that appeared in Rob Pike's sam editor,

				// which is a variant of the one described in Thompson's 1968 CACM paper.

				// See http://swtch.com/~rsc/regexp/ for various history.  The main feature

				// over the DFA implementation is that it tracks submatch boundaries.

				//

				// When the choice of submatch boundaries is ambiguous, this particular

				// implementation makes the same choices that traditional backtracking

				// implementations (in particular, Perl and PCRE) do.

				// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential

				// time in the length of the input.

				//

				// Like Thompson's original machine and like the DFA implementation, this

				// implementation notices a match only once it is one byte past it.

				#include "re2/prog.h"

				#include "re2/regexp.h"

				#include "util/sparse_array.h"

				#include "util/sparse_set.h"

				namespace re2 {

				class NFA {

				 public:

				  NFA(Prog* prog);

				  ~NFA();

				  // Searches for a matching string.

				  //   * If anchored is true, only considers matches starting at offset.

				  //     Otherwise finds lefmost match at or after offset.

				  //   * If longest is true, returns the longest match starting

				  //     at the chosen start point.  Otherwise returns the so-called

				  //     left-biased match, the one traditional backtracking engines

				  //     (like Perl and PCRE) find.

				  // Records submatch boundaries in submatch[1..nsubmatch-1].

				  // Submatch[0] is the entire match.  When there is a choice in

				  // which text matches each subexpression, the submatch boundaries

				  // are chosen to match what a backtracking implementation would choose.

				  bool Search(const StringPiece& text, const StringPiece& context,

				              bool anchored, bool longest,

				              StringPiece* submatch, int nsubmatch);

				  static const int Debug = 0;

				 private:

				  struct Thread {

				    union {

				      int id;

				      Thread* next;  // when on free list

				    };

				    const char** capture;

				  };

				  // State for explicit stack in AddToThreadq.

				  struct AddState {

				    int id;           // Inst to process

				    int j;

				    const char* cap_j;  // if j>=0, set capture[j] = cap_j before processing ip

				    AddState()

				      : id(0), j(-1), cap_j(NULL) {}

				    explicit AddState(int id)

				      : id(id), j(-1), cap_j(NULL) {}

				    AddState(int id, const char* cap_j, int j)

				      : id(id), j(j), cap_j(cap_j) {}

				  };

				  // Threadq is a list of threads.  The list is sorted by the order

				  // in which Perl would explore that particular state -- the earlier

				  // choices appear earlier in the list.

				  typedef SparseArray<Thread*> Threadq;

				  inline Thread* AllocThread();

				  inline void FreeThread(Thread*);

				  // Add id (or its children, following unlabeled arrows)

				  // to the workqueue q with associated capture info.

				  void AddToThreadq(Threadq* q, int id, int flag,

				                    const char* p, const char** capture);

				  // Run runq on byte c, appending new states to nextq.

				  // Updates matched_ and match_ as new, better matches are found.

				  // p is position of the next byte (the one after c)

				  // in the input string, used when processing capturing parens.

				  // flag is the bitwise or of Bol, Eol, etc., specifying whether

				  // ^, $ and \b match the current input point (after c).

				  inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p);

				  // Returns text version of capture information, for debugging.

				  string FormatCapture(const char** capture);

				  inline void CopyCapture(const char** dst, const char** src);

				  // Computes whether all matches must begin with the same first

				  // byte, and if so, returns that byte.  If not, returns -1.

				  int ComputeFirstByte();

				  Prog* prog_;          // underlying program

				  int start_;           // start instruction in program

				  int ncapture_;        // number of submatches to track

				  bool longest_;        // whether searching for longest match

				  bool endmatch_;       // whether match must end at text.end()

				  const char* btext_;   // beginning of text being matched (for FormatSubmatch)

				  const char* etext_;   // end of text being matched (for endmatch_)

				  Threadq q0_, q1_;     // pre-allocated for Search.

				  const char** match_;  // best match so far

				  bool matched_;        // any match so far?

				  AddState* astack_;    // pre-allocated for AddToThreadq

				  int nastack_;

				  int first_byte_;      // required first byte for match, or -1 if none

				  Thread* free_threads_;  // free list

				  DISALLOW_COPY_AND_ASSIGN(NFA);

				};

				NFA::NFA(Prog* prog) {

				  prog_ = prog;

				  start_ = prog->start();

				  ncapture_ = 0;

				  longest_ = false;

				  endmatch_ = false;

				  btext_ = NULL;

				  etext_ = NULL;

				  q0_.resize(prog_->size());

				  q1_.resize(prog_->size());

				  nastack_ = 2*prog_->size();

				  astack_ = new AddState[nastack_];

				  match_ = NULL;

				  matched_ = false;

				  free_threads_ = NULL;

				  first_byte_ = ComputeFirstByte();

				}

				NFA::~NFA() {

				  delete[] match_;

				  delete[] astack_;

				  Thread* next;

				  for (Thread* t = free_threads_; t; t = next) {

				    next = t->next;

				    delete[] t->capture;

				    delete t;

				  }

				}

				void NFA::FreeThread(Thread *t) {

				  if (t == NULL)

				    return;

				  t->next = free_threads_;

				  free_threads_ = t;

				}

				NFA::Thread* NFA::AllocThread() {

				  Thread* t = free_threads_;

				  if (t == NULL) {

				    t = new Thread;

				    t->capture = new const char*[ncapture_];

				    return t;

				  }

				  free_threads_ = t->next;

				  return t;

				}

				void NFA::CopyCapture(const char** dst, const char** src) {

				  for (int i = 0; i < ncapture_; i+=2) {

				    dst[i] = src[i];

				    dst[i+1] = src[i+1];

				  }

				}

				// Follows all empty arrows from id0 and enqueues all the states reached.

				// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match.

				// The pointer p is the current input position, and m is the

				// current set of match boundaries.

				void NFA::AddToThreadq(Threadq* q, int id0, int flag,

				                       const char* p, const char** capture) {

				  if (id0 == 0)

				    return;

				  // Astack_ is pre-allocated to avoid resize operations.

				  // It has room for 2*prog_->size() entries, which is enough:

				  // Each inst in prog can be processed at most once,

				  // pushing at most two entries on stk.

				  int nstk = 0;

				  AddState* stk = astack_;

				  stk[nstk++] = AddState(id0);

				  while (nstk > 0) {

				    DCHECK_LE(nstk, nastack_);

				    const AddState& a = stk[--nstk];

				    if (a.j >= 0)

				      capture[a.j] = a.cap_j;

				    int id = a.id;

				    if (id == 0)

				      continue;

				    if (q->has_index(id)) {

				      if (Debug)

				        fprintf(stderr, "  [%d%s]\n", id, FormatCapture(capture).c_str());

				      continue;

				    }

				    // Create entry in q no matter what.  We might fill it in below,

				    // or we might not.  Even if not, it is necessary to have it,

				    // so that we don't revisit id0 during the recursion.

				    q->set_new(id, NULL);

				    Thread** tp = &q->find(id)->second;

				    int j;

				    Thread* t;

				    Prog::Inst* ip = prog_->inst(id);

				    switch (ip->opcode()) {

				    default:

				      LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";

				      break;

				    case kInstFail:

				      break;

				    case kInstAltMatch:

				      // Save state; will pick up at next byte.

				      t = AllocThread();

				      t->id = id;

				      CopyCapture(t->capture, capture);

				      *tp = t;

				      // fall through

				    case kInstAlt:

				      // Explore alternatives.

				      stk[nstk++] = AddState(ip->out1());

				      stk[nstk++] = AddState(ip->out());

				      break;

				    case kInstNop:

				      // Continue on.

				      stk[nstk++] = AddState(ip->out());

				      break;

				    case kInstCapture:

				      if ((j=ip->cap()) < ncapture_) {

				        // Push a dummy whose only job is to restore capture[j]

				        // once we finish exploring this possibility.

				        stk[nstk++] = AddState(0, capture[j], j);

				        // Record capture.

				        capture[j] = p;

				      }

				      stk[nstk++] = AddState(ip->out());

				      break;

				    case kInstMatch:

				    case kInstByteRange:

				      // Save state; will pick up at next byte.

				      t = AllocThread();

				      t->id = id;

				      CopyCapture(t->capture, capture);

				      *tp = t;

				      if (Debug)

				        fprintf(stderr, " + %d%s [%p]\n", id, FormatCapture(t->capture).c_str(), t);

				      break;

				    case kInstEmptyWidth:

				      // Continue on if we have all the right flag bits.

				      if (ip->empty() & ~flag)

				        break;

				      stk[nstk++] = AddState(ip->out());

				      break;

				    }

				  }

				}

				// Run runq on byte c, appending new states to nextq.

				// Updates match as new, better matches are found.

				// p is position of the byte c in the input string,

				// used when processing capturing parens.

				// flag is the bitwise or of Bol, Eol, etc., specifying whether

				// ^, $ and \b match the current input point (after c).

				// Frees all the threads on runq.

				// If there is a shortcut to the end, returns that shortcut.

				int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) {

				  nextq->clear();

				  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {

				    Thread* t = i->second;

				    if (t == NULL)

				      continue;

				    if (longest_) {

				      // Can skip any threads started after our current best match.

				      if (matched_ && match_[0] < t->capture[0]) {

				        FreeThread(t);

				        continue;

				      }

				    }

				    int id = t->id;

				    Prog::Inst* ip = prog_->inst(id);

				    switch (ip->opcode()) {

				      default:

				        // Should only see the values handled below.

				        LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";

				        break;

				      case kInstByteRange:

				        if (ip->Matches(c))

				          AddToThreadq(nextq, ip->out(), flag, p+1, t->capture);

				        break;

				      case kInstAltMatch:

				        if (i != runq->begin())

				          break;

				        // The match is ours if we want it.

				        if (ip->greedy(prog_) || longest_) {

				          CopyCapture((const char**)match_, t->capture);

				          FreeThread(t);

				          for (++i; i != runq->end(); ++i)

				            FreeThread(i->second);

				          runq->clear();

				          matched_ = true;

				          if (ip->greedy(prog_))

				            return ip->out1();

				          return ip->out();

				        }

				        break;

				      case kInstMatch:

				        if (endmatch_ && p != etext_)

				          break;

				        const char* old = t->capture[1];  // previous end pointer

				        t->capture[1] = p;

				        if (longest_) {

				          // Leftmost-longest mode: save this match only if

				          // it is either farther to the left or at the same

				          // point but longer than an existing match.

				          if (!matched_ || t->capture[0] < match_[0] ||

				              (t->capture[0] == match_[0] && t->capture[1] > match_[1]))

				            CopyCapture((const char**)match_, t->capture);

				        } else {

				          // Leftmost-biased mode: this match is by definition

				          // better than what we've already found (see next line).

				          CopyCapture((const char**)match_, t->capture);

				          // Cut off the threads that can only find matches

				          // worse than the one we just found: don't run the

				          // rest of the current Threadq.

				          t->capture[0] = old;

				          FreeThread(t);

				          for (++i; i != runq->end(); ++i)

				            FreeThread(i->second);

				          runq->clear();

				          matched_ = true;

				          return 0;

				        }

				        t->capture[0] = old;

				        matched_ = true;

				        break;

				    }

				    FreeThread(t);

				  }

				  runq->clear();

				  return 0;

				}

				string NFA::FormatCapture(const char** capture) {

				  string s;

				  for (int i = 0; i < ncapture_; i+=2) {

				    if (capture[i] == NULL)

				      StringAppendF(&s, "(?,?)");

				    else if (capture[i+1] == NULL)

				      StringAppendF(&s, "(%d,?)", (int)(capture[i] - btext_));

				    else

				      StringAppendF(&s, "(%d,%d)",

				                    (int)(capture[i] - btext_),

				                    (int)(capture[i+1] - btext_));

				  }

				  return s;

				}

				// Returns whether haystack contains needle's memory.

				static bool StringPieceContains(const StringPiece haystack, const StringPiece needle) {

				  return haystack.begin() <= needle.begin() &&

				         haystack.end() >= needle.end();

				}

				bool NFA::Search(const StringPiece& text, const StringPiece& const_context,

				            bool anchored, bool longest,

				            StringPiece* submatch, int nsubmatch) {

				  if (start_ == 0)

				    return false;

				  StringPiece context = const_context;

				  if (context.begin() == NULL)

				    context = text;

				  if (!StringPieceContains(context, text)) {

				    LOG(FATAL) << "Bad args: context does not contain text "

				                << reinterpret_cast<const void*>(context.begin())

				                << "+" << context.size() << " "

				                << reinterpret_cast<const void*>(text.begin())

				                << "+" << text.size();

				    return false;

				  }

				  if (prog_->anchor_start() && context.begin() != text.begin())

				    return false;

				  if (prog_->anchor_end() && context.end() != text.end())

				    return false;

				  anchored |= prog_->anchor_start();

				  if (prog_->anchor_end()) {

				    longest = true;

				    endmatch_ = true;

				    etext_ = text.end();

				  }

				  if (nsubmatch < 0) {

				    LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;

				    return false;

				  }

				  // Save search parameters.

				  ncapture_ = 2*nsubmatch;

				  longest_ = longest;

				  if (nsubmatch == 0) {

				    // We need to maintain match[0], both to distinguish the

				    // longest match (if longest is true) and also to tell

				    // whether we've seen any matches at all.

				    ncapture_ = 2;

				  }

				  match_ = new const char*[ncapture_];

				  matched_ = false;

				  memset(match_, 0, ncapture_*sizeof match_[0]);

				  // For debugging prints.

				  btext_ = context.begin();

				  if (Debug) {

				    fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",

				            text.as_string().c_str(), context.as_string().c_str(), anchored,

				            longest);

				  }

				  // Set up search.

				  Threadq* runq = &q0_;

				  Threadq* nextq = &q1_;

				  runq->clear();

				  nextq->clear();

				  memset(&match_[0], 0, ncapture_*sizeof match_[0]);

				  const char* bp = context.begin();

				  int c = -1;

				  int wasword = 0;

				  if (text.begin() > context.begin()) {

				    c = text.begin()[-1] & 0xFF;

				    wasword = Prog::IsWordChar(static_cast<uint8>(c));

				  }

				  // Loop over the text, stepping the machine.

				  for (const char* p = text.begin();; p++) {

				    // Check for empty-width specials.

				    int flag = 0;

				    // ^ and \A

				    if (p == context.begin())

				      flag |= kEmptyBeginText | kEmptyBeginLine;

				    else if (p <= context.end() && p[-1] == '\n')

				      flag |= kEmptyBeginLine;

				    // $ and \z

				    if (p == context.end())

				      flag |= kEmptyEndText | kEmptyEndLine;

				    else if (p < context.end() && p[0] == '\n')

				      flag |= kEmptyEndLine;

				    // \b and \B

				    int isword = 0;

				    if (p < context.end())

				      isword = Prog::IsWordChar(p[0] & 0xFF);

				    if (isword != wasword)

				      flag |= kEmptyWordBoundary;

				    else

				      flag |= kEmptyNonWordBoundary;

				    if (Debug) {

				      fprintf(stderr, "%c[%#x/%d/%d]:", p > text.end() ? '$' : p == bp ? '^' : c, flag, isword, wasword);

				      for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {

				        Thread* t = i->second;

				        if (t == NULL)

				          continue;

				        fprintf(stderr, " %d%s", t->id,

				                FormatCapture((const char**)t->capture).c_str());

				      }

				      fprintf(stderr, "\n");

				    }

				    // Process previous character (waited until now to avoid

				    // repeating the flag computation above).

				    // This is a no-op the first time around the loop, because

				    // runq is empty.

				    int id = Step(runq, nextq, c, flag, p-1);

				    DCHECK_EQ(runq->size(), 0);

				    swap(nextq, runq);

				    nextq->clear();

				    if (id != 0) {

				      // We're done: full match ahead.

				      p = text.end();

				      for (;;) {

				        Prog::Inst* ip = prog_->inst(id);

				        switch (ip->opcode()) {

				          default:

				            LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode();

				            break;

				          case kInstCapture:

				            if (ip->cap() < ncapture_)

				              match_[ip->cap()] = p;

				            id = ip->out();

				            continue;

				          case kInstNop:

				            id = ip->out();

				            continue;

				          case kInstMatch:

				            match_[1] = p;

				            matched_ = true;

				            break;

				          case kInstEmptyWidth:

				            if (ip->empty() & ~(kEmptyEndLine|kEmptyEndText)) {

				              LOG(DFATAL) << "Unexpected empty-width in short circuit: " << ip->empty();

				              break;

				            }

				            id = ip->out();

				            continue;

				        }

				        break;

				      }

				      break;

				    }

				    if (p > text.end())

				      break;

				    // Start a new thread if there have not been any matches.

				    // (No point in starting a new thread if there have been

				    // matches, since it would be to the right of the match

				    // we already found.)

				    if (!matched_ && (!anchored || p == text.begin())) {

				      // If there's a required first byte for an unanchored search

				      // and we're not in the middle of any possible matches,

				      // use memchr to search for the byte quickly.

				      if (!anchored && first_byte_ >= 0 && runq->size() == 0 &&

				          p < text.end() && (p[0] & 0xFF) != first_byte_) {

				        p = reinterpret_cast<const char*>(memchr(p, first_byte_,

				                                                 text.end() - p));

				        if (p == NULL) {

				          p = text.end();

				          isword = 0;

				        } else {

				          isword = Prog::IsWordChar(p[0] & 0xFF);

				        }

				        flag = Prog::EmptyFlags(context, p);

				      }

				      // Steal match storage (cleared but unused as of yet)

				      // temporarily to hold match boundaries for new thread.

				      match_[0] = p;

				      AddToThreadq(runq, start_, flag, p, match_);

				      match_[0] = NULL;

				    }

				    // If all the threads have died, stop early.

				    if (runq->size() == 0) {

				      if (Debug)

				        fprintf(stderr, "dead\n");

				      break;

				    }

				    if (p == text.end())

				      c = 0;

				    else

				      c = *p & 0xFF;

				    wasword = isword;

				    // Will run step(runq, nextq, c, ...) on next iteration.  See above.

				  }

				  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)

				    FreeThread(i->second);

				  if (matched_) {

				    for (int i = 0; i < nsubmatch; i++)

				      submatch[i].set(match_[2*i],

				                      static_cast<int>(match_[2*i+1] - match_[2*i]));

				    if (Debug)

				      fprintf(stderr, "match (%d,%d)\n",

				              static_cast<int>(match_[0] - btext_),

				              static_cast<int>(match_[1] - btext_));

				    return true;

				  }

				  VLOG(1) << "No matches found";

				  return false;

				}

				// Computes whether all successful matches have a common first byte,

				// and if so, returns that byte.  If not, returns -1.

				int NFA::ComputeFirstByte() {

				  if (start_ == 0)

				    return -1;

				  int b = -1;  // first byte, not yet computed

				  typedef SparseSet Workq;

				  Workq q(prog_->size());

				  q.insert(start_);

				  for (Workq::iterator it = q.begin(); it != q.end(); ++it) {

				    int id = *it;

				    Prog::Inst* ip = prog_->inst(id);

				    switch (ip->opcode()) {

				      default:

				        LOG(DFATAL) << "unhandled " << ip->opcode() << " in ComputeFirstByte";

				        break;

				      case kInstMatch:

				        // The empty string matches: no first byte.

				        return -1;

				      case kInstByteRange:

				        // Must match only a single byte

				        if (ip->lo() != ip->hi())

				          return -1;

				        if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')

				          return -1;

				        // If we haven't seen any bytes yet, record it;

				        // otherwise must match the one we saw before.

				        if (b == -1)

				          b = ip->lo();

				        else if (b != ip->lo())

				          return -1;

				        break;

				      case kInstNop:

				      case kInstCapture:

				      case kInstEmptyWidth:

				        // Continue on.

				        // Ignore ip->empty() flags for kInstEmptyWidth

				        // in order to be as conservative as possible

				        // (assume all possible empty-width flags are true).

				        if (ip->out())

				          q.insert(ip->out());

				        break;

				      case kInstAlt:

				      case kInstAltMatch:

				        // Explore alternatives.

				        if (ip->out())

				          q.insert(ip->out());

				        if (ip->out1())

				          q.insert(ip->out1());

				        break;

				      case kInstFail:

				        break;

				    }

				  }

				  return b;

				}

				bool

				Prog::SearchNFA(const StringPiece& text, const StringPiece& context,

				                Anchor anchor, MatchKind kind,

				                StringPiece* match, int nmatch) {

				  if (NFA::Debug)

				    Dump();

				  NFA nfa(this);

				  StringPiece sp;

				  if (kind == kFullMatch) {

				    anchor = kAnchored;

				    if (nmatch == 0) {

				      match = &sp;

				      nmatch = 1;

				    }

				  }

				  if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch))

				    return false;

				  if (kind == kFullMatch && match[0].end() != text.end())

				    return false;

				  return true;

				}

				// For each instruction i in the program reachable from the start, compute the

				// number of instructions reachable from i by following only empty transitions

				// and record that count as fanout[i].

				//

				// fanout holds the results and is also the work queue for the outer iteration.

				// reachable holds the reached nodes for the inner iteration.

				void Prog::Fanout(SparseArray<int>* fanout) {

				  DCHECK_EQ(fanout->max_size(), size());

				  SparseSet reachable(size());

				  fanout->clear();

				  fanout->set_new(start(), 0);

				  for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {

				    int* count = &i->second;

				    reachable.clear();

				    reachable.insert(i->index());

				    for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {

				      Prog::Inst* ip = inst(*j);

				      switch (ip->opcode()) {

				        default:

				          LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()";

				          break;

				        case kInstByteRange:

				          (*count)++;

				          if (!fanout->has_index(ip->out())) {

				            fanout->set_new(ip->out(), 0);

				          }

				          break;

				        case kInstAlt:

				        case kInstAltMatch:

				          reachable.insert(ip->out1());

				          // fall through

				        case kInstCapture:

				        case kInstEmptyWidth:

				        case kInstNop:

				          reachable.insert(ip->out());

				          break;

				        case kInstMatch:

				        case kInstFail:

				          break;

				      }

				    }

				  }

				}

				}  // namespace re2

									
										611

third_party/re2/re2/onepass.cc
									
										vendored
									
				@ -1,611 +0,0 @@

				// Copyright 2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Tested by search_test.cc.

				//

				// Prog::SearchOnePass is an efficient implementation of

				// regular expression search with submatch tracking for

				// what I call "one-pass regular expressions".  (An alternate

				// name might be "backtracking-free regular expressions".)

				//

				// One-pass regular expressions have the property that

				// at each input byte during an anchored match, there may be

				// multiple alternatives but only one can proceed for any

				// given input byte.

				//

				// For example, the regexp /x*yx*/ is one-pass: you read

				// x's until a y, then you read the y, then you keep reading x's.

				// At no point do you have to guess what to do or back up

				// and try a different guess.

				//

				// On the other hand, /x*x/ is not one-pass: when you're

				// looking at an input "x", it's not clear whether you should

				// use it to extend the x* or as the final x.

				//

				// More examples: /([^ ]*) (.*)/ is one-pass; /(.*) (.*)/ is not.

				// /(\d+)-(\d+)/ is one-pass; /(\d+).(\d+)/ is not.

				//

				// A simple intuition for identifying one-pass regular expressions

				// is that it's always immediately obvious when a repetition ends.

				// It must also be immediately obvious which branch of an | to take:

				//

				// /x(y|z)/ is one-pass, but /(xy|xz)/ is not.

				//

				// The NFA-based search in nfa.cc does some bookkeeping to

				// avoid the need for backtracking and its associated exponential blowup.

				// But if we have a one-pass regular expression, there is no

				// possibility of backtracking, so there is no need for the

				// extra bookkeeping.  Hence, this code.

				//

				// On a one-pass regular expression, the NFA code in nfa.cc

				// runs at about 1/20 of the backtracking-based PCRE speed.

				// In contrast, the code in this file runs at about the same

				// speed as PCRE.

				//

				// One-pass regular expressions get used a lot when RE is

				// used for parsing simple strings, so it pays off to

				// notice them and handle them efficiently.

				//

				// See also Anne Brüggemann-Klein and Derick Wood,

				// "One-unambiguous regular languages", Information and Computation 142(2).

				#include <string.h>

				#include <map>

				#include "util/util.h"

				#include "util/sparse_set.h"

				#include "re2/prog.h"

				#include "re2/stringpiece.h"

				namespace re2 {

				static const int Debug = 0;

				// The key insight behind this implementation is that the

				// non-determinism in an NFA for a one-pass regular expression

				// is contained.  To explain what that means, first a

				// refresher about what regular expression programs look like

				// and how the usual NFA execution runs.

				//

				// In a regular expression program, only the kInstByteRange

				// instruction processes an input byte c and moves on to the

				// next byte in the string (it does so if c is in the given range).

				// The kInstByteRange instructions correspond to literal characters

				// and character classes in the regular expression.

				//

				// The kInstAlt instructions are used as wiring to connect the

				// kInstByteRange instructions together in interesting ways when

				// implementing | + and *.

				// The kInstAlt instruction forks execution, like a goto that

				// jumps to ip->out() and ip->out1() in parallel.  Each of the

				// resulting computation paths is called a thread.

				//

				// The other instructions -- kInstEmptyWidth, kInstMatch, kInstCapture --

				// are interesting in their own right but like kInstAlt they don't

				// advance the input pointer.  Only kInstByteRange does.

				//

				// The automaton execution in nfa.cc runs all the possible

				// threads of execution in lock-step over the input.  To process

				// a particular byte, each thread gets run until it either dies

				// or finds a kInstByteRange instruction matching the byte.

				// If the latter happens, the thread stops just past the

				// kInstByteRange instruction (at ip->out()) and waits for

				// the other threads to finish processing the input byte.

				// Then, once all the threads have processed that input byte,

				// the whole process repeats.  The kInstAlt state instruction

				// might create new threads during input processing, but no

				// matter what, all the threads stop after a kInstByteRange

				// and wait for the other threads to "catch up".

				// Running in lock step like this ensures that the NFA reads

				// the input string only once.

				//

				// Each thread maintains its own set of capture registers

				// (the string positions at which it executed the kInstCapture

				// instructions corresponding to capturing parentheses in the

				// regular expression).  Repeated copying of the capture registers

				// is the main performance bottleneck in the NFA implementation.

				//

				// A regular expression program is "one-pass" if, no matter what

				// the input string, there is only one thread that makes it

				// past a kInstByteRange instruction at each input byte.  This means

				// that there is in some sense only one active thread throughout

				// the execution.  Other threads might be created during the

				// processing of an input byte, but they are ephemeral: only one

				// thread is left to start processing the next input byte.

				// This is what I meant above when I said the non-determinism

				// was "contained".

				//

				// To execute a one-pass regular expression program, we can build

				// a DFA (no non-determinism) that has at most as many states as

				// the NFA (compare this to the possibly exponential number of states

				// in the general case).  Each state records, for each possible

				// input byte, the next state along with the conditions required

				// before entering that state -- empty-width flags that must be true

				// and capture operations that must be performed.  It also records

				// whether a set of conditions required to finish a match at that

				// point in the input rather than process the next byte.

				// A state in the one-pass NFA - just an array of actions indexed

				// by the bytemap_[] of the next input byte.  (The bytemap

				// maps next input bytes into equivalence classes, to reduce

				// the memory footprint.)

				struct OneState {

				  uint32 matchcond;   // conditions to match right now.

				  uint32 action[1];

				};

				// The uint32 conditions in the action are a combination of

				// condition and capture bits and the next state.  The bottom 16 bits

				// are the condition and capture bits, and the top 16 are the index of

				// the next state.

				//

				// Bits 0-5 are the empty-width flags from prog.h.

				// Bit 6 is kMatchWins, which means the match takes

				// priority over moving to next in a first-match search.

				// The remaining bits mark capture registers that should

				// be set to the current input position.  The capture bits

				// start at index 2, since the search loop can take care of

				// cap[0], cap[1] (the overall match position).

				// That means we can handle up to 5 capturing parens: $1 through $4, plus $0.

				// No input position can satisfy both kEmptyWordBoundary

				// and kEmptyNonWordBoundary, so we can use that as a sentinel

				// instead of needing an extra bit.

				static const int    kIndexShift    = 16;  // number of bits below index

				static const int    kEmptyShift   = 6;  // number of empty flags in prog.h

				static const int    kRealCapShift = kEmptyShift + 1;

				static const int    kRealMaxCap   = (kIndexShift - kRealCapShift) / 2 * 2;

				// Parameters used to skip over cap[0], cap[1].

				static const int    kCapShift     = kRealCapShift - 2;

				static const int    kMaxCap       = kRealMaxCap + 2;

				static const uint32 kMatchWins    = 1 << kEmptyShift;

				static const uint32 kCapMask      = ((1 << kRealMaxCap) - 1) << kRealCapShift;

				static const uint32 kImpossible   = kEmptyWordBoundary | kEmptyNonWordBoundary;

				// Check, at compile time, that prog.h agrees with math above.

				// This function is never called.

				void OnePass_Checks() {

				  COMPILE_ASSERT((1<<kEmptyShift)-1 == kEmptyAllFlags,

				                 kEmptyShift_disagrees_with_kEmptyAllFlags);

				  // kMaxCap counts pointers, kMaxOnePassCapture counts pairs.

				  COMPILE_ASSERT(kMaxCap == Prog::kMaxOnePassCapture*2,

				                 kMaxCap_disagrees_with_kMaxOnePassCapture);

				}

				static bool Satisfy(uint32 cond, const StringPiece& context, const char* p) {

				  uint32 satisfied = Prog::EmptyFlags(context, p);

				  if (cond & kEmptyAllFlags & ~satisfied)

				    return false;

				  return true;

				}

				// Apply the capture bits in cond, saving p to the appropriate

				// locations in cap[].

				static void ApplyCaptures(uint32 cond, const char* p,

				                          const char** cap, int ncap) {

				  for (int i = 2; i < ncap; i++)

				    if (cond & (1 << kCapShift << i))

				      cap[i] = p;

				}

				// Compute a node pointer.

				// Basically (OneState*)(nodes + statesize*nodeindex)

				// but the version with the C++ casts overflows 80 characters (and is ugly).

				static inline OneState* IndexToNode(volatile uint8* nodes, int statesize,

				                                    int nodeindex) {

				  return reinterpret_cast<OneState*>(

				    const_cast<uint8*>(nodes + statesize*nodeindex));

				}

				bool Prog::SearchOnePass(const StringPiece& text,

				                         const StringPiece& const_context,

				                         Anchor anchor, MatchKind kind,

				                         StringPiece* match, int nmatch) {

				  if (anchor != kAnchored && kind != kFullMatch) {

				    LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";

				    return false;

				  }

				  // Make sure we have at least cap[1],

				  // because we use it to tell if we matched.

				  int ncap = 2*nmatch;

				  if (ncap < 2)

				    ncap = 2;

				  const char* cap[kMaxCap];

				  for (int i = 0; i < ncap; i++)

				    cap[i] = NULL;

				  const char* matchcap[kMaxCap];

				  for (int i = 0; i < ncap; i++)

				    matchcap[i] = NULL;

				  StringPiece context = const_context;

				  if (context.begin() == NULL)

				    context = text;

				  if (anchor_start() && context.begin() != text.begin())

				    return false;

				  if (anchor_end() && context.end() != text.end())

				    return false;

				  if (anchor_end())

				    kind = kFullMatch;

				  // State and act are marked volatile to

				  // keep the compiler from re-ordering the

				  // memory accesses walking over the NFA.

				  // This is worth about 5%.

				  volatile OneState* state = onepass_start_;

				  volatile uint8* nodes = onepass_nodes_;

				  volatile uint32 statesize = onepass_statesize_;

				  uint8* bytemap = bytemap_;

				  const char* bp = text.begin();

				  const char* ep = text.end();

				  const char* p;

				  bool matched = false;

				  matchcap[0] = bp;

				  cap[0] = bp;

				  uint32 nextmatchcond = state->matchcond;

				  for (p = bp; p < ep; p++) {

				    int c = bytemap[*p & 0xFF];

				    uint32 matchcond = nextmatchcond;

				    uint32 cond = state->action[c];

				    // Determine whether we can reach act->next.

				    // If so, advance state and nextmatchcond.

				    if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) {

				      uint32 nextindex = cond >> kIndexShift;

				      state = IndexToNode(nodes, statesize, nextindex);

				      nextmatchcond = state->matchcond;

				    } else {

				      state = NULL;

				      nextmatchcond = kImpossible;

				    }

				    // This code section is carefully tuned.

				    // The goto sequence is about 10% faster than the

				    // obvious rewrite as a large if statement in the

				    // ASCIIMatchRE2 and DotMatchRE2 benchmarks.

				    // Saving the match capture registers is expensive.

				    // Is this intermediate match worth thinking about?

				    // Not if we want a full match.

				    if (kind == kFullMatch)

				      goto skipmatch;

				    // Not if it's impossible.

				    if (matchcond == kImpossible)

				      goto skipmatch;

				    // Not if the possible match is beaten by the certain

				    // match at the next byte.  When this test is useless

				    // (e.g., HTTPPartialMatchRE2) it slows the loop by

				    // about 10%, but when it avoids work (e.g., DotMatchRE2),

				    // it cuts the loop execution by about 45%.

				    if ((cond & kMatchWins) == 0 && (nextmatchcond & kEmptyAllFlags) == 0)

				      goto skipmatch;

				    // Finally, the match conditions must be satisfied.

				    if ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p)) {

				      for (int i = 2; i < 2*nmatch; i++)

				        matchcap[i] = cap[i];

				      if (nmatch > 1 && (matchcond & kCapMask))

				        ApplyCaptures(matchcond, p, matchcap, ncap);

				      matchcap[1] = p;

				      matched = true;

				      // If we're in longest match mode, we have to keep

				      // going and see if we find a longer match.

				      // In first match mode, we can stop if the match

				      // takes priority over the next state for this input byte.

				      // That bit is per-input byte and thus in cond, not matchcond.

				      if (kind == kFirstMatch && (cond & kMatchWins))

				        goto done;

				    }

				  skipmatch:

				    if (state == NULL)

				      goto done;

				    if ((cond & kCapMask) && nmatch > 1)

				      ApplyCaptures(cond, p, cap, ncap);

				  }

				  // Look for match at end of input.

				  {

				    uint32 matchcond = state->matchcond;

				    if (matchcond != kImpossible &&

				        ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) {

				      if (nmatch > 1 && (matchcond & kCapMask))

				        ApplyCaptures(matchcond, p, cap, ncap);

				      for (int i = 2; i < ncap; i++)

				        matchcap[i] = cap[i];

				      matchcap[1] = p;

				      matched = true;

				    }

				  }

				done:

				  if (!matched)

				    return false;

				  for (int i = 0; i < nmatch; i++)

				    match[i].set(matchcap[2*i],

				                 static_cast<int>(matchcap[2*i+1] - matchcap[2*i]));

				  return true;

				}

				// Analysis to determine whether a given regexp program is one-pass.

				// If ip is not on workq, adds ip to work queue and returns true.

				// If ip is already on work queue, does nothing and returns false.

				// If ip is NULL, does nothing and returns true (pretends to add it).

				typedef SparseSet Instq;

				static bool AddQ(Instq *q, int id) {

				  if (id == 0)

				    return true;

				  if (q->contains(id))

				    return false;

				  q->insert(id);

				  return true;

				}

				struct InstCond {

				  int id;

				  uint32 cond;

				};

				// Returns whether this is a one-pass program; that is,

				// returns whether it is safe to use SearchOnePass on this program.

				// These conditions must be true for any instruction ip:

				//

				//   (1) for any other Inst nip, there is at most one input-free

				//       path from ip to nip.

				//   (2) there is at most one kInstByte instruction reachable from

				//       ip that matches any particular byte c.

				//   (3) there is at most one input-free path from ip to a kInstMatch

				//       instruction.

				//

				// This is actually just a conservative approximation: it might

				// return false when the answer is true, when kInstEmptyWidth

				// instructions are involved.

				// Constructs and saves corresponding one-pass NFA on success.

				bool Prog::IsOnePass() {

				  if (did_onepass_)

				    return onepass_start_ != NULL;

				  did_onepass_ = true;

				  if (start() == 0)  // no match

				    return false;

				  // Steal memory for the one-pass NFA from the overall DFA budget.

				  // Willing to use at most 1/4 of the DFA budget (heuristic).

				  // Limit max node count to 65000 as a conservative estimate to

				  // avoid overflowing 16-bit node index in encoding.

				  int maxnodes = 2 + byte_inst_count_;

				  int statesize = sizeof(OneState) + (bytemap_range_-1)*sizeof(uint32);

				  if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes)

				    return false;

				  // Flood the graph starting at the start state, and check

				  // that in each reachable state, each possible byte leads

				  // to a unique next state.

				  int size = this->size();

				  InstCond *stack = new InstCond[size];

				  int* nodebyid = new int[size];  // indexed by ip

				  memset(nodebyid, 0xFF, size*sizeof nodebyid[0]);

				  uint8* nodes = new uint8[maxnodes*statesize];

				  uint8* nodep = nodes;

				  Instq tovisit(size), workq(size);

				  AddQ(&tovisit, start());

				  nodebyid[start()] = 0;

				  nodep += statesize;

				  int nalloc = 1;

				  for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {

				    int id = *it;

				    int nodeindex = nodebyid[id];

				    OneState* node = IndexToNode(nodes, statesize, nodeindex);

				    // Flood graph using manual stack, filling in actions as found.

				    // Default is none.

				    for (int b = 0; b < bytemap_range_; b++)

				      node->action[b] = kImpossible;

				    node->matchcond = kImpossible;

				    workq.clear();

				    bool matched = false;

				    int nstack = 0;

				    stack[nstack].id = id;

				    stack[nstack++].cond = 0;

				    while (nstack > 0) {

				      int id = stack[--nstack].id;

				      Prog::Inst* ip = inst(id);

				      uint32 cond = stack[nstack].cond;

				      switch (ip->opcode()) {

				        case kInstAltMatch:

				          // TODO(rsc): Ignoring kInstAltMatch optimization.

				          // Should implement it in this engine, but it's subtle.

				          // Fall through.

				        case kInstAlt:

				          // If already on work queue, (1) is violated: bail out.

				          if (!AddQ(&workq, ip->out()) || !AddQ(&workq, ip->out1()))

				            goto fail;

				          stack[nstack].id = ip->out1();

				          stack[nstack++].cond = cond;

				          stack[nstack].id = ip->out();

				          stack[nstack++].cond = cond;

				          break;

				        case kInstByteRange: {

				          int nextindex = nodebyid[ip->out()];

				          if (nextindex == -1) {

				            if (nalloc >= maxnodes) {

				              if (Debug)

				                LOG(ERROR)

				                  << StringPrintf("Not OnePass: hit node limit %d > %d",

				                                  nalloc, maxnodes);

				              goto fail;

				            }

				            nextindex = nalloc;

				            nodep += statesize;

				            nodebyid[ip->out()] = nextindex;

				            nalloc++;

				            AddQ(&tovisit, ip->out());

				          }

				          if (matched)

				            cond |= kMatchWins;

				          for (int c = ip->lo(); c <= ip->hi(); c++) {

				            int b = bytemap_[c];

				            c = unbytemap_[b];  // last c in byte class

				            uint32 act = node->action[b];

				            uint32 newact = (nextindex << kIndexShift) | cond;

				            if ((act & kImpossible) == kImpossible) {

				              node->action[b] = newact;

				            } else if (act != newact) {

				              if (Debug) {

				                LOG(ERROR)

				                  << StringPrintf("Not OnePass: conflict on byte "

				                                  "%#x at state %d",

				                                  c, *it);

				              }

				              goto fail;

				            }

				          }

				          if (ip->foldcase()) {

				            Rune lo = max<Rune>(ip->lo(), 'a') + 'A' - 'a';

				            Rune hi = min<Rune>(ip->hi(), 'z') + 'A' - 'a';

				            for (int c = lo; c <= hi; c++) {

				              int b = bytemap_[c];

				              c = unbytemap_[b];  // last c in class

				              uint32 act = node->action[b];

				              uint32 newact = (nextindex << kIndexShift) | cond;

				              if ((act & kImpossible) == kImpossible) {

				                node->action[b] = newact;

				              } else if (act != newact) {

				                if (Debug) {

				                  LOG(ERROR)

				                    << StringPrintf("Not OnePass: conflict on byte "

				                                    "%#x at state %d",

				                                    c, *it);

				                }

				                goto fail;

				              }

				            }

				          }

				          break;

				        }

				        case kInstCapture:

				          if (ip->cap() < kMaxCap)

				            cond |= (1 << kCapShift) << ip->cap();

				          goto QueueEmpty;

				        case kInstEmptyWidth:

				          cond |= ip->empty();

				          goto QueueEmpty;

				        case kInstNop:

				        QueueEmpty:

				          // kInstCapture and kInstNop always proceed to ip->out().

				          // kInstEmptyWidth only sometimes proceeds to ip->out(),

				          // but as a conservative approximation we assume it always does.

				          // We could be a little more precise by looking at what c

				          // is, but that seems like overkill.

				          // If already on work queue, (1) is violated: bail out.

				          if (!AddQ(&workq, ip->out())) {

				            if (Debug) {

				              LOG(ERROR) << StringPrintf("Not OnePass: multiple paths"

				                                         " %d -> %d\n",

				                                         *it, ip->out());

				            }

				            goto fail;

				          }

				          stack[nstack].id = ip->out();

				          stack[nstack++].cond = cond;

				          break;

				        case kInstMatch:

				          if (matched) {

				            // (3) is violated

				            if (Debug) {

				              LOG(ERROR) << StringPrintf("Not OnePass: multiple matches"

				                                         " from %d\n", *it);

				            }

				            goto fail;

				          }

				          matched = true;

				          node->matchcond = cond;

				          break;

				        case kInstFail:

				          break;

				      }

				    }

				  }

				  if (Debug) {  // For debugging, dump one-pass NFA to LOG(ERROR).

				    string dump = "prog dump:\n" + Dump() + "node dump\n";

				    map<int, int> idmap;

				    for (int i = 0; i < size; i++)

				      if (nodebyid[i] != -1)

				        idmap[nodebyid[i]] = i;

				    StringAppendF(&dump, "byte ranges:\n");

				    int i = 0;

				    for (int b = 0; b < bytemap_range_; b++) {

				      int lo = i;

				      while (bytemap_[i] == b)

				        i++;

				      StringAppendF(&dump, "\t%d: %#x-%#x\n", b, lo, i - 1);

				    }

				    for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {

				      int id = *it;

				      int nodeindex = nodebyid[id];

				      if (nodeindex == -1)

				      	continue;

				      OneState* node = IndexToNode(nodes, statesize, nodeindex);

				      string s;

				      StringAppendF(&dump, "node %d id=%d: matchcond=%#x\n",

				                    nodeindex, id, node->matchcond);

				      for (int i = 0; i < bytemap_range_; i++) {

				        if ((node->action[i] & kImpossible) == kImpossible)

				          continue;

				        StringAppendF(&dump, "  %d cond %#x -> %d id=%d\n",

				                      i, node->action[i] & 0xFFFF,

				                      node->action[i] >> kIndexShift,

				                      idmap[node->action[i] >> kIndexShift]);

				      }

				    }

				    LOG(ERROR) << dump;

				  }

				  // Overallocated earlier; cut down to actual size.

				  nodep = new uint8[nalloc*statesize];

				  memmove(nodep, nodes, nalloc*statesize);

				  delete[] nodes;

				  nodes = nodep;

				  onepass_start_ = IndexToNode(nodes, statesize, nodebyid[start()]);

				  onepass_nodes_ = nodes;

				  onepass_statesize_ = statesize;

				  dfa_mem_ -= nalloc*statesize;

				  delete[] stack;

				  delete[] nodebyid;

				  return true;

				fail:

				  delete[] stack;

				  delete[] nodebyid;

				  delete[] nodes;

				  return false;

				}

				}  // namespace re2

2270

third_party/re2/re2/parse.cc vendored

File diff suppressed because it is too large Load Diff

									
										119

third_party/re2/re2/perl_groups.cc
									
										vendored
									
				@ -1,119 +0,0 @@

				// GENERATED BY make_perl_groups.pl; DO NOT EDIT.

				// make_perl_groups.pl >perl_groups.cc

				#include "re2/unicode_groups.h"

				namespace re2 {

				static const URange16 code1[] = {  /* \d */

					{ 0x30, 0x39 },

				};

				static const URange16 code2[] = {  /* \s */

					{ 0x9, 0xa },

					{ 0xc, 0xd },

					{ 0x20, 0x20 },

				};

				static const URange16 code3[] = {  /* \w */

					{ 0x30, 0x39 },

					{ 0x41, 0x5a },

					{ 0x5f, 0x5f },

					{ 0x61, 0x7a },

				};

				const UGroup perl_groups[] = {

					{ "\\d", +1, code1, 1 },

					{ "\\D", -1, code1, 1 },

					{ "\\s", +1, code2, 3 },

					{ "\\S", -1, code2, 3 },

					{ "\\w", +1, code3, 4 },

					{ "\\W", -1, code3, 4 },

				};

				const int num_perl_groups = 6;

				static const URange16 code4[] = {  /* [:alnum:] */

					{ 0x30, 0x39 },

					{ 0x41, 0x5a },

					{ 0x61, 0x7a },

				};

				static const URange16 code5[] = {  /* [:alpha:] */

					{ 0x41, 0x5a },

					{ 0x61, 0x7a },

				};

				static const URange16 code6[] = {  /* [:ascii:] */

					{ 0x0, 0x7f },

				};

				static const URange16 code7[] = {  /* [:blank:] */

					{ 0x9, 0x9 },

					{ 0x20, 0x20 },

				};

				static const URange16 code8[] = {  /* [:cntrl:] */

					{ 0x0, 0x1f },

					{ 0x7f, 0x7f },

				};

				static const URange16 code9[] = {  /* [:digit:] */

					{ 0x30, 0x39 },

				};

				static const URange16 code10[] = {  /* [:graph:] */

					{ 0x21, 0x7e },

				};

				static const URange16 code11[] = {  /* [:lower:] */

					{ 0x61, 0x7a },

				};

				static const URange16 code12[] = {  /* [:print:] */

					{ 0x20, 0x7e },

				};

				static const URange16 code13[] = {  /* [:punct:] */

					{ 0x21, 0x2f },

					{ 0x3a, 0x40 },

					{ 0x5b, 0x60 },

					{ 0x7b, 0x7e },

				};

				static const URange16 code14[] = {  /* [:space:] */

					{ 0x9, 0xd },

					{ 0x20, 0x20 },

				};

				static const URange16 code15[] = {  /* [:upper:] */

					{ 0x41, 0x5a },

				};

				static const URange16 code16[] = {  /* [:word:] */

					{ 0x30, 0x39 },

					{ 0x41, 0x5a },

					{ 0x5f, 0x5f },

					{ 0x61, 0x7a },

				};

				static const URange16 code17[] = {  /* [:xdigit:] */

					{ 0x30, 0x39 },

					{ 0x41, 0x46 },

					{ 0x61, 0x66 },

				};

				const UGroup posix_groups[] = {

					{ "[:alnum:]", +1, code4, 3 },

					{ "[:^alnum:]", -1, code4, 3 },

					{ "[:alpha:]", +1, code5, 2 },

					{ "[:^alpha:]", -1, code5, 2 },

					{ "[:ascii:]", +1, code6, 1 },

					{ "[:^ascii:]", -1, code6, 1 },

					{ "[:blank:]", +1, code7, 2 },

					{ "[:^blank:]", -1, code7, 2 },

					{ "[:cntrl:]", +1, code8, 2 },

					{ "[:^cntrl:]", -1, code8, 2 },

					{ "[:digit:]", +1, code9, 1 },

					{ "[:^digit:]", -1, code9, 1 },

					{ "[:graph:]", +1, code10, 1 },

					{ "[:^graph:]", -1, code10, 1 },

					{ "[:lower:]", +1, code11, 1 },

					{ "[:^lower:]", -1, code11, 1 },

					{ "[:print:]", +1, code12, 1 },

					{ "[:^print:]", -1, code12, 1 },

					{ "[:punct:]", +1, code13, 4 },

					{ "[:^punct:]", -1, code13, 4 },

					{ "[:space:]", +1, code14, 2 },

					{ "[:^space:]", -1, code14, 2 },

					{ "[:upper:]", +1, code15, 1 },

					{ "[:^upper:]", -1, code15, 1 },

					{ "[:word:]", +1, code16, 4 },

					{ "[:^word:]", -1, code16, 4 },

					{ "[:xdigit:]", +1, code17, 3 },

					{ "[:^xdigit:]", -1, code17, 3 },

				};

				const int num_posix_groups = 28;

				}  // namespace re2

									
										709

third_party/re2/re2/prefilter.cc
									
										vendored
									
				@ -1,709 +0,0 @@

				// Copyright 2009 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				#include "util/util.h"

				#include "re2/prefilter.h"

				#include "re2/re2.h"

				#include "re2/unicode_casefold.h"

				#include "re2/walker-inl.h"

				namespace re2 {

				static const int Trace = false;

				typedef set<string>::iterator SSIter;

				typedef set<string>::const_iterator ConstSSIter;

				GLOBAL_MUTEX(alloc_id_mutex);

				static int alloc_id = 100000;  // Used for debugging.

				// Initializes a Prefilter, allocating subs_ as necessary.

				Prefilter::Prefilter(Op op) {

				  op_ = op;

				  subs_ = NULL;

				  if (op_ == AND || op_ == OR)

				    subs_ = new vector<Prefilter*>;

				  GLOBAL_MUTEX_LOCK(alloc_id_mutex);

				  alloc_id_ = alloc_id++;

				  GLOBAL_MUTEX_UNLOCK(alloc_id_mutex);

				  VLOG(10) << "alloc_id: " << alloc_id_;

				}

				// Destroys a Prefilter.

				Prefilter::~Prefilter() {

				  VLOG(10) << "Deleted: " << alloc_id_;

				  if (subs_) {

				    for (size_t i = 0; i < subs_->size(); i++)

				      delete (*subs_)[i];

				    delete subs_;

				    subs_ = NULL;

				  }

				}

				// Simplify if the node is an empty Or or And.

				Prefilter* Prefilter::Simplify() {

				  if (op_ != AND && op_ != OR) {

				    return this;

				  }

				  // Nothing left in the AND/OR.

				  if (subs_->size() == 0) {

				    if (op_ == AND)

				      op_ = ALL;  // AND of nothing is true

				    else

				      op_ = NONE;  // OR of nothing is false

				    return this;

				  }

				  // Just one subnode: throw away wrapper.

				  if (subs_->size() == 1) {

				    Prefilter* a = (*subs_)[0];

				    subs_->clear();

				    delete this;

				    return a->Simplify();

				  }

				  return this;

				}

				// Combines two Prefilters together to create an "op" (AND or OR).

				// The passed Prefilters will be part of the returned Prefilter or deleted.

				// Does lots of work to avoid creating unnecessarily complicated structures.

				Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {

				  // If a, b can be rewritten as op, do so.

				  a = a->Simplify();

				  b = b->Simplify();

				  // Canonicalize: a->op <= b->op.

				  if (a->op() > b->op()) {

				    Prefilter* t = a;

				    a = b;

				    b = t;

				  }

				  // Trivial cases.

				  //    ALL AND b = b

				  //    NONE OR b = b

				  //    ALL OR b   = ALL

				  //    NONE AND b = NONE

				  // Don't need to look at b, because of canonicalization above.

				  // ALL and NONE are smallest opcodes.

				  if (a->op() == ALL || a->op() == NONE) {

				    if ((a->op() == ALL && op == AND) ||

				        (a->op() == NONE && op == OR)) {

				      delete a;

				      return b;

				    } else {

				      delete b;

				      return a;

				    }

				  }

				  // If a and b match op, merge their contents.

				  if (a->op() == op && b->op() == op) {

				    for (size_t i = 0; i < b->subs()->size(); i++) {

				      Prefilter* bb = (*b->subs())[i];

				      a->subs()->push_back(bb);

				    }

				    b->subs()->clear();

				    delete b;

				    return a;

				  }

				  // If a already has the same op as the op that is under construction

				  // add in b (similarly if b already has the same op, add in a).

				  if (b->op() == op) {

				    Prefilter* t = a;

				    a = b;

				    b = t;

				  }

				  if (a->op() == op) {

				    a->subs()->push_back(b);

				    return a;

				  }

				  // Otherwise just return the op.

				  Prefilter* c = new Prefilter(op);

				  c->subs()->push_back(a);

				  c->subs()->push_back(b);

				  return c;

				}

				Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) {

				  return AndOr(AND, a, b);

				}

				Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {

				  return AndOr(OR, a, b);

				}

				static void SimplifyStringSet(set<string> *ss) {

				  // Now make sure that the strings aren't redundant.  For example, if

				  // we know "ab" is a required string, then it doesn't help at all to

				  // know that "abc" is also a required string, so delete "abc". This

				  // is because, when we are performing a string search to filter

				  // regexps, matching ab will already allow this regexp to be a

				  // candidate for match, so further matching abc is redundant.

				  for (SSIter i = ss->begin(); i != ss->end(); ++i) {

				    SSIter j = i;

				    ++j;

				    while (j != ss->end()) {

				      // Increment j early so that we can erase the element it points to.

				      SSIter old_j = j;

				      ++j;

				      if (old_j->find(*i) != string::npos)

				        ss->erase(old_j);

				    }

				  }

				}

				Prefilter* Prefilter::OrStrings(set<string>* ss) {

				  SimplifyStringSet(ss);

				  Prefilter* or_prefilter = NULL;

				  if (!ss->empty()) {

				    or_prefilter = new Prefilter(NONE);

				    for (SSIter i = ss->begin(); i != ss->end(); ++i)

				      or_prefilter = Or(or_prefilter, FromString(*i));

				  }

				  return or_prefilter;

				}

				static Rune ToLowerRune(Rune r) {

				  if (r < Runeself) {

				    if ('A' <= r && r <= 'Z')

				      r += 'a' - 'A';

				    return r;

				  }

				  const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);

				  if (f == NULL || r < f->lo)

				    return r;

				  return ApplyFold(f, r);

				}

				static Rune ToLowerRuneLatin1(Rune r) {

				  if ('A' <= r && r <= 'Z')

				    r += 'a' - 'A';

				  return r;

				}

				Prefilter* Prefilter::FromString(const string& str) {

				  Prefilter* m = new Prefilter(Prefilter::ATOM);

				  m->atom_ = str;

				  return m;

				}

				// Information about a regexp used during computation of Prefilter.

				// Can be thought of as information about the set of strings matching

				// the given regular expression.

				class Prefilter::Info {

				 public:

				  Info();

				  ~Info();

				  // More constructors.  They delete their Info* arguments.

				  static Info* Alt(Info* a, Info* b);

				  static Info* Concat(Info* a, Info* b);

				  static Info* And(Info* a, Info* b);

				  static Info* Star(Info* a);

				  static Info* Plus(Info* a);

				  static Info* Quest(Info* a);

				  static Info* EmptyString();

				  static Info* NoMatch();

				  static Info* AnyChar();

				  static Info* CClass(CharClass* cc, bool latin1);

				  static Info* Literal(Rune r);

				  static Info* LiteralLatin1(Rune r);

				  static Info* AnyMatch();

				  // Format Info as a string.

				  string ToString();

				  // Caller takes ownership of the Prefilter.

				  Prefilter* TakeMatch();

				  set<string>& exact() { return exact_; }

				  bool is_exact() const { return is_exact_; }

				  class Walker;

				 private:

				  set<string> exact_;

				  // When is_exact_ is true, the strings that match

				  // are placed in exact_. When it is no longer an exact

				  // set of strings that match this RE, then is_exact_

				  // is false and the match_ contains the required match

				  // criteria.

				  bool is_exact_;

				  // Accumulated Prefilter query that any

				  // match for this regexp is guaranteed to match.

				  Prefilter* match_;

				};

				Prefilter::Info::Info()

				  : is_exact_(false),

				    match_(NULL) {

				}

				Prefilter::Info::~Info() {

				  delete match_;

				}

				Prefilter* Prefilter::Info::TakeMatch() {

				  if (is_exact_) {

				    match_ = Prefilter::OrStrings(&exact_);

				    is_exact_ = false;

				  }

				  Prefilter* m = match_;

				  match_ = NULL;

				  return m;

				}

				// Format a Info in string form.

				string Prefilter::Info::ToString() {

				  if (is_exact_) {

				    int n = 0;

				    string s;

				    for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) {

				      if (n++ > 0)

				        s += ",";

				      s += *i;

				    }

				    return s;

				  }

				  if (match_)

				    return match_->DebugString();

				  return "";

				}

				// Add the strings from src to dst.

				static void CopyIn(const set<string>& src, set<string>* dst) {

				  for (ConstSSIter i = src.begin(); i != src.end(); ++i)

				    dst->insert(*i);

				}

				// Add the cross-product of a and b to dst.

				// (For each string i in a and j in b, add i+j.)

				static void CrossProduct(const set<string>& a,

				                         const set<string>& b,

				                         set<string>* dst) {

				  for (ConstSSIter i = a.begin(); i != a.end(); ++i)

				    for (ConstSSIter j = b.begin(); j != b.end(); ++j)

				      dst->insert(*i + *j);

				}

				// Concats a and b. Requires that both are exact sets.

				// Forms an exact set that is a crossproduct of a and b.

				Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {

				  if (a == NULL)

				    return b;

				  DCHECK(a->is_exact_);

				  DCHECK(b && b->is_exact_);

				  Info *ab = new Info();

				  CrossProduct(a->exact_, b->exact_, &ab->exact_);

				  ab->is_exact_ = true;

				  delete a;

				  delete b;

				  return ab;

				}

				// Constructs an inexact Info for ab given a and b.

				// Used only when a or b is not exact or when the

				// exact cross product is likely to be too big.

				Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) {

				  if (a == NULL)

				    return b;

				  if (b == NULL)

				    return a;

				  Info *ab = new Info();

				  ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch());

				  ab->is_exact_ = false;

				  delete a;

				  delete b;

				  return ab;

				}

				// Constructs Info for a|b given a and b.

				Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {

				  Info *ab = new Info();

				  if (a->is_exact_ && b->is_exact_) {

				    CopyIn(a->exact_, &ab->exact_);

				    CopyIn(b->exact_, &ab->exact_);

				    ab->is_exact_ = true;

				  } else {

				    // Either a or b has is_exact_ = false. If the other

				    // one has is_exact_ = true, we move it to match_ and

				    // then create a OR of a,b. The resulting Info has

				    // is_exact_ = false.

				    ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch());

				    ab->is_exact_ = false;

				  }

				  delete a;

				  delete b;

				  return ab;

				}

				// Constructs Info for a? given a.

				Prefilter::Info* Prefilter::Info::Quest(Info *a) {

				  Info *ab = new Info();

				  ab->is_exact_ = false;

				  ab->match_ = new Prefilter(ALL);

				  delete a;

				  return ab;

				}

				// Constructs Info for a* given a.

				// Same as a? -- not much to do.

				Prefilter::Info* Prefilter::Info::Star(Info *a) {

				  return Quest(a);

				}

				// Constructs Info for a+ given a. If a was exact set, it isn't

				// anymore.

				Prefilter::Info* Prefilter::Info::Plus(Info *a) {

				  Info *ab = new Info();

				  ab->match_ = a->TakeMatch();

				  ab->is_exact_ = false;

				  delete a;

				  return ab;

				}

				static string RuneToString(Rune r) {

				  char buf[UTFmax];

				  int n = runetochar(buf, &r);

				  return string(buf, n);

				}

				static string RuneToStringLatin1(Rune r) {

				  char c = r & 0xff;

				  return string(&c, 1);

				}

				// Constructs Info for literal rune.

				Prefilter::Info* Prefilter::Info::Literal(Rune r) {

				  Info* info = new Info();

				  info->exact_.insert(RuneToString(ToLowerRune(r)));

				  info->is_exact_ = true;

				  return info;

				}

				// Constructs Info for literal rune for Latin1 encoded string.

				Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {

				  Info* info = new Info();

				  info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));

				  info->is_exact_ = true;

				  return info;

				}

				// Constructs Info for dot (any character).

				Prefilter::Info* Prefilter::Info::AnyChar() {

				  Prefilter::Info* info = new Prefilter::Info();

				  info->match_ = new Prefilter(ALL);

				  return info;

				}

				// Constructs Prefilter::Info for no possible match.

				Prefilter::Info* Prefilter::Info::NoMatch() {

				  Prefilter::Info* info = new Prefilter::Info();

				  info->match_ = new Prefilter(NONE);

				  return info;

				}

				// Constructs Prefilter::Info for any possible match.

				// This Prefilter::Info is valid for any regular expression,

				// since it makes no assertions whatsoever about the

				// strings being matched.

				Prefilter::Info* Prefilter::Info::AnyMatch() {

				  Prefilter::Info *info = new Prefilter::Info();

				  info->match_ = new Prefilter(ALL);

				  return info;

				}

				// Constructs Prefilter::Info for just the empty string.

				Prefilter::Info* Prefilter::Info::EmptyString() {

				  Prefilter::Info* info = new Prefilter::Info();

				  info->is_exact_ = true;

				  info->exact_.insert("");

				  return info;

				}

				// Constructs Prefilter::Info for a character class.

				typedef CharClass::iterator CCIter;

				Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,

				                                         bool latin1) {

				  if (Trace) {

				    VLOG(0) << "CharClassInfo:";

				    for (CCIter i = cc->begin(); i != cc->end(); ++i)

				      VLOG(0) << "  " << i->lo << "-" << i->hi;

				  }

				  // If the class is too large, it's okay to overestimate.

				  if (cc->size() > 10)

				    return AnyChar();

				  Prefilter::Info *a = new Prefilter::Info();

				  for (CCIter i = cc->begin(); i != cc->end(); ++i)

				    for (Rune r = i->lo; r <= i->hi; r++) {

				      if (latin1) {

				        a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));

				      } else {

				        a->exact_.insert(RuneToString(ToLowerRune(r)));

				      }

				    }

				  a->is_exact_ = true;

				  if (Trace) {

				    VLOG(0) << " = " << a->ToString();

				  }

				  return a;

				}

				class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {

				 public:

				  Walker(bool latin1) : latin1_(latin1) {}

				  virtual Info* PostVisit(

				      Regexp* re, Info* parent_arg,

				      Info* pre_arg,

				      Info** child_args, int nchild_args);

				  virtual Info* ShortVisit(

				      Regexp* re,

				      Info* parent_arg);

				  bool latin1() { return latin1_; }

				 private:

				  bool latin1_;

				  DISALLOW_COPY_AND_ASSIGN(Walker);

				};

				Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {

				  if (Trace) {

				    LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();

				  }

				  bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;

				  Prefilter::Info::Walker w(latin1);

				  Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);

				  if (w.stopped_early()) {

				    delete info;

				    return NULL;

				  }

				  return info;

				}

				Prefilter::Info* Prefilter::Info::Walker::ShortVisit(

				    Regexp* re, Prefilter::Info* parent_arg) {

				  return AnyMatch();

				}

				// Constructs the Prefilter::Info for the given regular expression.

				// Assumes re is simplified.

				Prefilter::Info* Prefilter::Info::Walker::PostVisit(

				    Regexp* re, Prefilter::Info* parent_arg,

				    Prefilter::Info* pre_arg, Prefilter::Info** child_args,

				    int nchild_args) {

				  Prefilter::Info *info;

				  switch (re->op()) {

				    default:

				    case kRegexpRepeat:

				      LOG(DFATAL) << "Bad regexp op " << re->op();

				      info = EmptyString();

				      break;

				    case kRegexpNoMatch:

				      info = NoMatch();

				      break;

				    // These ops match the empty string:

				    case kRegexpEmptyMatch:      // anywhere

				    case kRegexpBeginLine:       // at beginning of line

				    case kRegexpEndLine:         // at end of line

				    case kRegexpBeginText:       // at beginning of text

				    case kRegexpEndText:         // at end of text

				    case kRegexpWordBoundary:    // at word boundary

				    case kRegexpNoWordBoundary:  // not at word boundary

				      info = EmptyString();

				      break;

				    case kRegexpLiteral:

				      if (latin1()) {

				        info = LiteralLatin1(re->rune());

				      }

				      else {

				        info = Literal(re->rune());

				      }

				      break;

				    case kRegexpLiteralString:

				      if (re->nrunes() == 0) {

				        info = NoMatch();

				        break;

				      }

				      if (latin1()) {

				        info = LiteralLatin1(re->runes()[0]);

				        for (int i = 1; i < re->nrunes(); i++) {

				          info = Concat(info, LiteralLatin1(re->runes()[i]));

				        }

				      } else {

				        info = Literal(re->runes()[0]);

				        for (int i = 1; i < re->nrunes(); i++) {

				          info = Concat(info, Literal(re->runes()[i]));

				        }

				      }

				      break;

				    case kRegexpConcat: {

				      // Accumulate in info.

				      // Exact is concat of recent contiguous exact nodes.

				      info = NULL;

				      Info* exact = NULL;

				      for (int i = 0; i < nchild_args; i++) {

				        Info* ci = child_args[i];  // child info

				        if (!ci->is_exact() ||

				            (exact && ci->exact().size() * exact->exact().size() > 16)) {

				          // Exact run is over.

				          info = And(info, exact);

				          exact = NULL;

				          // Add this child's info.

				          info = And(info, ci);

				        } else {

				          // Append to exact run.

				          exact = Concat(exact, ci);

				        }

				      }

				      info = And(info, exact);

				    }

				      break;

				    case kRegexpAlternate:

				      info = child_args[0];

				      for (int i = 1; i < nchild_args; i++)

				        info = Alt(info, child_args[i]);

				      VLOG(10) << "Alt: " << info->ToString();

				      break;

				    case kRegexpStar:

				      info = Star(child_args[0]);

				      break;

				    case kRegexpQuest:

				      info = Quest(child_args[0]);

				      break;

				    case kRegexpPlus:

				      info = Plus(child_args[0]);

				      break;

				    case kRegexpAnyChar:

				      // Claim nothing, except that it's not empty.

				      info = AnyChar();

				      break;

				    case kRegexpCharClass:

				      info = CClass(re->cc(), latin1());

				      break;

				    case kRegexpCapture:

				      // These don't affect the set of matching strings.

				      info = child_args[0];

				      break;

				  }

				  if (Trace) {

				    VLOG(0) << "BuildInfo " << re->ToString()

				            << ": " << (info ? info->ToString() : "");

				  }

				  return info;

				}

				Prefilter* Prefilter::FromRegexp(Regexp* re) {

				  if (re == NULL)

				    return NULL;

				  Regexp* simple = re->Simplify();

				  Prefilter::Info *info = BuildInfo(simple);

				  simple->Decref();

				  if (info == NULL)

				    return NULL;

				  Prefilter* m = info->TakeMatch();

				  delete info;

				  return m;

				}

				string Prefilter::DebugString() const {

				  switch (op_) {

				    default:

				      LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;

				      return StringPrintf("op%d", op_);

				    case NONE:

				      return "*no-matches*";

				    case ATOM:

				      return atom_;

				    case ALL:

				      return "";

				    case AND: {

				      string s = "";

				      for (size_t i = 0; i < subs_->size(); i++) {

				        if (i > 0)

				          s += " ";

				        Prefilter* sub = (*subs_)[i];

				        s += sub ? sub->DebugString() : "<nil>";

				      }

				      return s;

				    }

				    case OR: {

				      string s = "(";

				      for (size_t i = 0; i < subs_->size(); i++) {

				        if (i > 0)

				          s += "|";

				        Prefilter* sub = (*subs_)[i];

				        s += sub ? sub->DebugString() : "<nil>";

				      }

				      s += ")";

				      return s;

				    }

				  }

				}

				Prefilter* Prefilter::FromRE2(const RE2* re2) {

				  if (re2 == NULL)

				    return NULL;

				  Regexp* regexp = re2->Regexp();

				  if (regexp == NULL)

				    return NULL;

				  return FromRegexp(regexp);

				}

				}  // namespace re2

									
										105

third_party/re2/re2/prefilter.h
									
										vendored
									
				@ -1,105 +0,0 @@

				// Copyright 2009 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Prefilter is the class used to extract string guards from regexps.

				// Rather than using Prefilter class directly, use FilteredRE2.

				// See filtered_re2.h

				#ifndef RE2_PREFILTER_H_

				#define RE2_PREFILTER_H_

				#include "util/util.h"

				namespace re2 {

				class RE2;

				class Regexp;

				class Prefilter {

				  // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h

				 public:

				  enum Op {

				    ALL = 0,  // Everything matches

				    NONE,  // Nothing matches

				    ATOM,  // The string atom() must match

				    AND,   // All in subs() must match

				    OR,   // One of subs() must match

				  };

				  explicit Prefilter(Op op);

				  ~Prefilter();

				  Op op() { return op_; }

				  const string& atom() const { return atom_; }

				  void set_unique_id(int id) { unique_id_ = id; }

				  int unique_id() const { return unique_id_; }

				  // The children of the Prefilter node.

				  vector<Prefilter*>* subs() {

				    CHECK(op_ == AND || op_ == OR);

				    return subs_;

				  }

				  // Set the children vector. Prefilter takes ownership of subs and

				  // subs_ will be deleted when Prefilter is deleted.

				  void set_subs(vector<Prefilter*>* subs) { subs_ = subs; }

				  // Given a RE2, return a Prefilter. The caller takes ownership of

				  // the Prefilter and should deallocate it. Returns NULL if Prefilter

				  // cannot be formed.

				  static Prefilter* FromRE2(const RE2* re2);

				  // Returns a readable debug string of the prefilter.

				  string DebugString() const;

				 private:

				  class Info;

				  // Combines two prefilters together to create an AND. The passed

				  // Prefilters will be part of the returned Prefilter or deleted.

				  static Prefilter* And(Prefilter* a, Prefilter* b);

				  // Combines two prefilters together to create an OR. The passed

				  // Prefilters will be part of the returned Prefilter or deleted.

				  static Prefilter* Or(Prefilter* a, Prefilter* b);

				  // Generalized And/Or

				  static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);

				  static Prefilter* FromRegexp(Regexp* a);

				  static Prefilter* FromString(const string& str);

				  static Prefilter* OrStrings(set<string>* ss);

				  static Info* BuildInfo(Regexp* re);

				  Prefilter* Simplify();

				  // Kind of Prefilter.

				  Op op_;

				  // Sub-matches for AND or OR Prefilter.

				  vector<Prefilter*>* subs_;

				  // Actual string to match in leaf node.

				  string atom_;

				  // If different prefilters have the same string atom, or if they are

				  // structurally the same (e.g., OR of same atom strings) they are

				  // considered the same unique nodes. This is the id for each unique

				  // node. This field is populated with a unique id for every node,

				  // and -1 for duplicate nodes.

				  int unique_id_;

				  // Used for debugging, helps in tracking memory leaks.

				  int alloc_id_;

				  DISALLOW_COPY_AND_ASSIGN(Prefilter);

				};

				}  // namespace re2

				#endif  // RE2_PREFILTER_H_

									
										402

third_party/re2/re2/prefilter_tree.cc
									
										vendored
									
				@ -1,402 +0,0 @@

				// Copyright 2009 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				#include "util/util.h"

				#include "util/flags.h"

				#include "re2/prefilter.h"

				#include "re2/prefilter_tree.h"

				#include "re2/re2.h"

				DEFINE_int32(filtered_re2_min_atom_len,

				             3,

				             "Strings less than this length are not stored as atoms");

				namespace re2 {

				PrefilterTree::PrefilterTree()

				    : compiled_(false) {

				}

				PrefilterTree::~PrefilterTree() {

				  for (size_t i = 0; i < prefilter_vec_.size(); i++)

				    delete prefilter_vec_[i];

				  for (size_t i = 0; i < entries_.size(); i++)

				    delete entries_[i].parents;

				}

				// Functions used for adding and Compiling prefilters to the

				// PrefilterTree.

				static bool KeepPart(Prefilter* prefilter, int level) {

				  if (prefilter == NULL)

				    return false;

				  switch (prefilter->op()) {

				    default:

				      LOG(DFATAL) << "Unexpected op in KeepPart: "

				                  << prefilter->op();

				      return false;

				    case Prefilter::ALL:

				      return false;

				    case Prefilter::ATOM:

				      return prefilter->atom().size() >=

				          static_cast<size_t>(FLAGS_filtered_re2_min_atom_len);

				    case Prefilter::AND: {

				      int j = 0;

				      vector<Prefilter*>* subs = prefilter->subs();

				      for (size_t i = 0; i < subs->size(); i++)

				        if (KeepPart((*subs)[i], level + 1))

				          (*subs)[j++] = (*subs)[i];

				        else

				          delete (*subs)[i];

				      subs->resize(j);

				      return j > 0;

				    }

				    case Prefilter::OR:

				      for (size_t i = 0; i < prefilter->subs()->size(); i++)

				        if (!KeepPart((*prefilter->subs())[i], level + 1))

				          return false;

				      return true;

				  }

				}

				void PrefilterTree::Add(Prefilter *f) {

				  if (compiled_) {

				    LOG(DFATAL) << "Add after Compile.";

				    return;

				  }

				  if (f != NULL && !KeepPart(f, 0)) {

				    delete f;

				    f = NULL;

				  }

				  prefilter_vec_.push_back(f);

				}

				void PrefilterTree::Compile(vector<string>* atom_vec) {

				  if (compiled_) {

				    LOG(DFATAL) << "Compile after Compile.";

				    return;

				  }

				  // We do this check to support some legacy uses of

				  // PrefilterTree that call Compile before adding any regexps,

				  // and expect Compile not to have effect.

				  if (prefilter_vec_.empty())

				    return;

				  compiled_ = true;

				  AssignUniqueIds(atom_vec);

				  // Identify nodes that are too common among prefilters and are

				  // triggering too many parents. Then get rid of them if possible.

				  // Note that getting rid of a prefilter node simply means they are

				  // no longer necessary for their parent to trigger; that is, we do

				  // not miss out on any regexps triggering by getting rid of a

				  // prefilter node.

				  for (size_t i = 0; i < entries_.size(); i++) {

				    StdIntMap* parents = entries_[i].parents;

				    if (parents->size() > 8) {

				      // This one triggers too many things. If all the parents are AND

				      // nodes and have other things guarding them, then get rid of

				      // this trigger. TODO(vsri): Adjust the threshold appropriately,

				      // make it a function of total number of nodes?

				      bool have_other_guard = true;

				      for (StdIntMap::iterator it = parents->begin();

				           it != parents->end(); ++it) {

				        have_other_guard = have_other_guard &&

				            (entries_[it->first].propagate_up_at_count > 1);

				      }

				      if (have_other_guard) {

				        for (StdIntMap::iterator it = parents->begin();

				             it != parents->end(); ++it)

				          entries_[it->first].propagate_up_at_count -= 1;

				        parents->clear();  // Forget the parents

				      }

				    }

				  }

				  PrintDebugInfo();

				}

				Prefilter* PrefilterTree::CanonicalNode(Prefilter* node) {

				  string node_string = NodeString(node);

				  map<string, Prefilter*>::iterator iter = node_map_.find(node_string);

				  if (iter == node_map_.end())

				    return NULL;

				  return (*iter).second;

				}

				static string Itoa(int n) {

				  char buf[100];

				  snprintf(buf, sizeof buf, "%d", n);

				  return string(buf);

				}

				string PrefilterTree::NodeString(Prefilter* node) const {

				  // Adding the operation disambiguates AND/OR/atom nodes.

				  string s = Itoa(node->op()) + ":";

				  if (node->op() == Prefilter::ATOM) {

				    s += node->atom();

				  } else {

				    for (size_t i = 0; i < node->subs()->size(); i++) {

				      if (i > 0)

				        s += ',';

				      s += Itoa((*node->subs())[i]->unique_id());

				    }

				  }

				  return s;

				}

				void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {

				  atom_vec->clear();

				  // Build vector of all filter nodes, sorted topologically

				  // from top to bottom in v.

				  vector<Prefilter*> v;

				  // Add the top level nodes of each regexp prefilter.

				  for (size_t i = 0; i < prefilter_vec_.size(); i++) {

				    Prefilter* f = prefilter_vec_[i];

				    if (f == NULL)

				      unfiltered_.push_back(static_cast<int>(i));

				    // We push NULL also on to v, so that we maintain the

				    // mapping of index==regexpid for level=0 prefilter nodes.

				    v.push_back(f);

				  }

				  // Now add all the descendant nodes.

				  for (size_t i = 0; i < v.size(); i++) {

				    Prefilter* f = v[i];

				    if (f == NULL)

				      continue;

				    if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) {

				      const vector<Prefilter*>& subs = *f->subs();

				      for (size_t j = 0; j < subs.size(); j++)

				        v.push_back(subs[j]);

				    }

				  }

				  // Identify unique nodes.

				  int unique_id = 0;

				  for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {

				    Prefilter *node = v[i];

				    if (node == NULL)

				      continue;

				    node->set_unique_id(-1);

				    Prefilter* canonical = CanonicalNode(node);

				    if (canonical == NULL) {

				      // Any further nodes that have the same node string

				      // will find this node as the canonical node.

				      node_map_[NodeString(node)] = node;

				      if (node->op() == Prefilter::ATOM) {

				        atom_vec->push_back(node->atom());

				        atom_index_to_id_.push_back(unique_id);

				      }

				      node->set_unique_id(unique_id++);

				    } else {

				      node->set_unique_id(canonical->unique_id());

				    }

				  }

				  entries_.resize(node_map_.size());

				  // Create parent StdIntMap for the entries.

				  for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {

				    Prefilter* prefilter = v[i];

				    if (prefilter == NULL)

				      continue;

				    if (CanonicalNode(prefilter) != prefilter)

				      continue;

				    Entry* entry = &entries_[prefilter->unique_id()];

				    entry->parents = new StdIntMap();

				  }

				  // Fill the entries.

				  for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {

				    Prefilter* prefilter = v[i];

				    if (prefilter == NULL)

				      continue;

				    if (CanonicalNode(prefilter) != prefilter)

				      continue;

				    Entry* entry = &entries_[prefilter->unique_id()];

				    switch (prefilter->op()) {

				      default:

				      case Prefilter::ALL:

				        LOG(DFATAL) << "Unexpected op: " << prefilter->op();

				        return;

				      case Prefilter::ATOM:

				        entry->propagate_up_at_count = 1;

				        break;

				      case Prefilter::OR:

				      case Prefilter::AND: {

				        set<int> uniq_child;

				        for (size_t j = 0; j < prefilter->subs()->size(); j++) {

				          Prefilter* child = (*prefilter->subs())[j];

				          Prefilter* canonical = CanonicalNode(child);

				          if (canonical == NULL) {

				            LOG(DFATAL) << "Null canonical node";

				            return;

				          }

				          int child_id = canonical->unique_id();

				          uniq_child.insert(child_id);

				          // To the child, we want to add to parent indices.

				          Entry* child_entry = &entries_[child_id];

				          if (child_entry->parents->find(prefilter->unique_id()) ==

				              child_entry->parents->end()) {

				            (*child_entry->parents)[prefilter->unique_id()] = 1;

				          }

				        }

				        entry->propagate_up_at_count = prefilter->op() == Prefilter::AND

				                                           ? static_cast<int>(uniq_child.size())

				                                           : 1;

				        break;

				      }

				    }

				  }

				  // For top level nodes, populate regexp id.

				  for (size_t i = 0; i < prefilter_vec_.size(); i++) {

				    if (prefilter_vec_[i] == NULL)

				      continue;

				    int id = CanonicalNode(prefilter_vec_[i])->unique_id();

				    DCHECK_LE(0, id);

				    Entry* entry = &entries_[id];

				    entry->regexps.push_back(static_cast<int>(i));

				  }

				}

				// Functions for triggering during search.

				void PrefilterTree::RegexpsGivenStrings(

				    const vector<int>& matched_atoms,

				    vector<int>* regexps) const {

				  regexps->clear();

				  if (!compiled_) {

				    LOG(WARNING) << "Compile() not called";

				    for (size_t i = 0; i < prefilter_vec_.size(); ++i)

				      regexps->push_back(static_cast<int>(i));

				  } else {

				    if (!prefilter_vec_.empty()) {

				      IntMap regexps_map(static_cast<int>(prefilter_vec_.size()));

				      vector<int> matched_atom_ids;

				      for (size_t j = 0; j < matched_atoms.size(); j++) {

				        matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);

				        VLOG(10) << "Atom id:" << atom_index_to_id_[matched_atoms[j]];

				      }

				      PropagateMatch(matched_atom_ids, &regexps_map);

				      for (IntMap::iterator it = regexps_map.begin();

				           it != regexps_map.end();

				           ++it)

				        regexps->push_back(it->index());

				      regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end());

				    }

				  }

				  sort(regexps->begin(), regexps->end());

				}

				void PrefilterTree::PropagateMatch(const vector<int>& atom_ids,

				                                   IntMap* regexps) const {

				  IntMap count(static_cast<int>(entries_.size()));

				  IntMap work(static_cast<int>(entries_.size()));

				  for (size_t i = 0; i < atom_ids.size(); i++)

				    work.set(atom_ids[i], 1);

				  for (IntMap::iterator it = work.begin(); it != work.end(); ++it) {

				    const Entry& entry = entries_[it->index()];

				    VLOG(10) << "Processing: " << it->index();

				    // Record regexps triggered.

				    for (size_t i = 0; i < entry.regexps.size(); i++) {

				      VLOG(10) << "Regexp triggered: " << entry.regexps[i];

				      regexps->set(entry.regexps[i], 1);

				    }

				    int c;

				    // Pass trigger up to parents.

				    for (StdIntMap::iterator it = entry.parents->begin();

				         it != entry.parents->end();

				         ++it) {

				      int j = it->first;

				      const Entry& parent = entries_[j];

				      VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count;

				      // Delay until all the children have succeeded.

				      if (parent.propagate_up_at_count > 1) {

				        if (count.has_index(j)) {

				          c = count.get_existing(j) + 1;

				          count.set_existing(j, c);

				        } else {

				          c = 1;

				          count.set_new(j, c);

				        }

				        if (c < parent.propagate_up_at_count)

				          continue;

				      }

				      VLOG(10) << "Triggering: " << j;

				      // Trigger the parent.

				      work.set(j, 1);

				    }

				  }

				}

				// Debugging help.

				void PrefilterTree::PrintPrefilter(int regexpid) {

				  LOG(INFO) << DebugNodeString(prefilter_vec_[regexpid]);

				}

				void PrefilterTree::PrintDebugInfo() {

				  VLOG(10) << "#Unique Atoms: " << atom_index_to_id_.size();

				  VLOG(10) << "#Unique Nodes: " << entries_.size();

				  for (size_t i = 0; i < entries_.size(); ++i) {

				    StdIntMap* parents = entries_[i].parents;

				    const vector<int>& regexps = entries_[i].regexps;

				    VLOG(10) << "EntryId: " << i

				            << " N: " << parents->size() << " R: " << regexps.size();

				    for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)

				      VLOG(10) << it->first;

				  }

				  VLOG(10) << "Map:";

				  for (map<string, Prefilter*>::const_iterator iter = node_map_.begin();

				       iter != node_map_.end(); ++iter)

				    VLOG(10) << "NodeId: " << (*iter).second->unique_id()

				            << " Str: " << (*iter).first;

				}

				string PrefilterTree::DebugNodeString(Prefilter* node) const {

				  string node_string = "";

				  if (node->op() == Prefilter::ATOM) {

				    DCHECK(!node->atom().empty());

				    node_string += node->atom();

				  } else {

				    // Adding the operation disambiguates AND and OR nodes.

				    node_string +=  node->op() == Prefilter::AND ? "AND" : "OR";

				    node_string += "(";

				    for (size_t i = 0; i < node->subs()->size(); i++) {

				      if (i > 0)

				        node_string += ',';

				      node_string += Itoa((*node->subs())[i]->unique_id());

				      node_string += ":";

				      node_string += DebugNodeString((*node->subs())[i]);

				    }

				    node_string += ")";

				  }

				  return node_string;

				}

				}  // namespace re2

									
										131

third_party/re2/re2/prefilter_tree.h
									
										vendored
									
				@ -1,131 +0,0 @@

				// Copyright 2009 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// The PrefilterTree class is used to form an AND-OR tree of strings

				// that would trigger each regexp. The 'prefilter' of each regexp is

				// added tp PrefilterTree, and then PrefilterTree is used to find all

				// the unique strings across the prefilters. During search, by using

				// matches from a string matching engine, PrefilterTree deduces the

				// set of regexps that are to be triggered. The 'string matching

				// engine' itself is outside of this class, and the caller can use any

				// favorite engine. PrefilterTree provides a set of strings (called

				// atoms) that the user of this class should use to do the string

				// matching.

				//

				#ifndef RE2_PREFILTER_TREE_H_

				#define RE2_PREFILTER_TREE_H_

				#include "util/util.h"

				#include "util/sparse_array.h"

				namespace re2 {

				typedef SparseArray<int> IntMap;

				typedef map<int, int> StdIntMap;

				class Prefilter;

				class PrefilterTree {

				 public:

				  PrefilterTree();

				  ~PrefilterTree();

				  // Adds the prefilter for the next regexp. Note that we assume that

				  // Add called sequentially for all regexps. All Add calls

				  // must precede Compile.

				  void Add(Prefilter* prefilter);

				  // The Compile returns a vector of string in atom_vec.

				  // Call this after all the prefilters are added through Add.

				  // No calls to Add after Compile are allowed.

				  // The caller should use the returned set of strings to do string matching.

				  // Each time a string matches, the corresponding index then has to be

				  // and passed to RegexpsGivenStrings below.

				  void Compile(vector<string>* atom_vec);

				  // Given the indices of the atoms that matched, returns the indexes

				  // of regexps that should be searched.  The matched_atoms should

				  // contain all the ids of string atoms that were found to match the

				  // content. The caller can use any string match engine to perform

				  // this function. This function is thread safe.

				  void RegexpsGivenStrings(const vector<int>& matched_atoms,

				                           vector<int>* regexps) const;

				  // Print debug prefilter. Also prints unique ids associated with

				  // nodes of the prefilter of the regexp.

				  void PrintPrefilter(int regexpid);

				  // Each unique node has a corresponding Entry that helps in

				  // passing the matching trigger information along the tree.

				  struct Entry {

				   public:

				    // How many children should match before this node triggers the

				    // parent. For an atom and an OR node, this is 1 and for an AND

				    // node, it is the number of unique children.

				    int propagate_up_at_count;

				    // When this node is ready to trigger the parent, what are the indices

				    // of the parent nodes to trigger. The reason there may be more than

				    // one is because of sharing. For example (abc | def) and (xyz | def)

				    // are two different nodes, but they share the atom 'def'. So when

				    // 'def' matches, it triggers two parents, corresponding to the two

				    // different OR nodes.

				    StdIntMap* parents;

				    // When this node is ready to trigger the parent, what are the

				    // regexps that are triggered.

				    vector<int> regexps;

				  };

				 private:

				  // This function assigns unique ids to various parts of the

				  // prefilter, by looking at if these nodes are already in the

				  // PrefilterTree.

				  void AssignUniqueIds(vector<string>* atom_vec);

				  // Given the matching atoms, find the regexps to be triggered.

				  void PropagateMatch(const vector<int>& atom_ids,

				                      IntMap* regexps) const;

				  // Returns the prefilter node that has the same NodeString as this

				  // node. For the canonical node, returns node.

				  Prefilter* CanonicalNode(Prefilter* node);

				  // A string that uniquely identifies the node. Assumes that the

				  // children of node has already been assigned unique ids.

				  string NodeString(Prefilter* node) const;

				  // Recursively constructs a readable prefilter string.

				  string DebugNodeString(Prefilter* node) const;

				  // Used for debugging.

				  void PrintDebugInfo();

				  // These are all the nodes formed by Compile. Essentially, there is

				  // one node for each unique atom and each unique AND/OR node.

				  vector<Entry> entries_;

				  // Map node string to canonical Prefilter node.

				  map<string, Prefilter*> node_map_;

				  // indices of regexps that always pass through the filter (since we

				  // found no required literals in these regexps).

				  vector<int> unfiltered_;

				  // vector of Prefilter for all regexps.

				  vector<Prefilter*> prefilter_vec_;

				  // Atom index in returned strings to entry id mapping.

				  vector<int> atom_index_to_id_;

				  // Has the prefilter tree been compiled.

				  bool compiled_;

				  DISALLOW_COPY_AND_ASSIGN(PrefilterTree);

				};

				}  // namespace

				#endif  // RE2_PREFILTER_TREE_H_

									
										343

third_party/re2/re2/prog.cc
									
										vendored
									
				@ -1,343 +0,0 @@

				// Copyright 2007 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Compiled regular expression representation.

				// Tested by compile_test.cc

				#include "util/util.h"

				#include "util/sparse_set.h"

				#include "re2/prog.h"

				#include "re2/stringpiece.h"

				namespace re2 {

				// Constructors per Inst opcode

				void Prog::Inst::InitAlt(uint32 out, uint32 out1) {

				  DCHECK_EQ(out_opcode_, 0);

				  set_out_opcode(out, kInstAlt);

				  out1_ = out1;

				}

				void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32 out) {

				  DCHECK_EQ(out_opcode_, 0);

				  set_out_opcode(out, kInstByteRange);

				  lo_ = lo & 0xFF;

				  hi_ = hi & 0xFF;

				  foldcase_ = foldcase & 0xFF;

				}

				void Prog::Inst::InitCapture(int cap, uint32 out) {

				  DCHECK_EQ(out_opcode_, 0);

				  set_out_opcode(out, kInstCapture);

				  cap_ = cap;

				}

				void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32 out) {

				  DCHECK_EQ(out_opcode_, 0);

				  set_out_opcode(out, kInstEmptyWidth);

				  empty_ = empty;

				}

				void Prog::Inst::InitMatch(int32 id) {

				  DCHECK_EQ(out_opcode_, 0);

				  set_opcode(kInstMatch);

				  match_id_ = id;

				}

				void Prog::Inst::InitNop(uint32 out) {

				  DCHECK_EQ(out_opcode_, 0);

				  set_opcode(kInstNop);

				}

				void Prog::Inst::InitFail() {

				  DCHECK_EQ(out_opcode_, 0);

				  set_opcode(kInstFail);

				}

				string Prog::Inst::Dump() {

				  switch (opcode()) {

				    default:

				      return StringPrintf("opcode %d", static_cast<int>(opcode()));

				    case kInstAlt:

				      return StringPrintf("alt -> %d | %d", out(), out1_);

				    case kInstAltMatch:

				      return StringPrintf("altmatch -> %d | %d", out(), out1_);

				    case kInstByteRange:

				      return StringPrintf("byte%s [%02x-%02x] -> %d",

				                          foldcase_ ? "/i" : "",

				                          lo_, hi_, out());

				    case kInstCapture:

				      return StringPrintf("capture %d -> %d", cap_, out());

				    case kInstEmptyWidth:

				      return StringPrintf("emptywidth %#x -> %d",

				                          static_cast<int>(empty_), out());

				    case kInstMatch:

				      return StringPrintf("match! %d", match_id());

				    case kInstNop:

				      return StringPrintf("nop -> %d", out());

				    case kInstFail:

				      return StringPrintf("fail");

				  }

				}

				Prog::Prog()

				  : anchor_start_(false),

				    anchor_end_(false),

				    reversed_(false),

				    did_onepass_(false),

				    start_(0),

				    start_unanchored_(0),

				    size_(0),

				    byte_inst_count_(0),

				    bytemap_range_(0),

				    flags_(0),

				    onepass_statesize_(0),

				    inst_(NULL),

				    dfa_first_(NULL),

				    dfa_longest_(NULL),

				    dfa_mem_(0),

				    delete_dfa_(NULL),

				    unbytemap_(NULL),

				    onepass_nodes_(NULL),

				    onepass_start_(NULL) {

				}

				Prog::~Prog() {

				  if (delete_dfa_) {

				    if (dfa_first_)

				      delete_dfa_(dfa_first_);

				    if (dfa_longest_)

				      delete_dfa_(dfa_longest_);

				  }

				  delete[] onepass_nodes_;

				  delete[] inst_;

				  delete[] unbytemap_;

				}

				typedef SparseSet Workq;

				static inline void AddToQueue(Workq* q, int id) {

				  if (id != 0)

				    q->insert(id);

				}

				static string ProgToString(Prog* prog, Workq* q) {

				  string s;

				  for (Workq::iterator i = q->begin(); i != q->end(); ++i) {

				    int id = *i;

				    Prog::Inst* ip = prog->inst(id);

				    StringAppendF(&s, "%d. %s\n", id, ip->Dump().c_str());

				    AddToQueue(q, ip->out());

				    if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)

				      AddToQueue(q, ip->out1());

				  }

				  return s;

				}

				string Prog::Dump() {

				  string map;

				  if (false) {  // Debugging

				    int lo = 0;

				    StringAppendF(&map, "byte map:\n");

				    for (int i = 0; i < bytemap_range_; i++) {

				      StringAppendF(&map, "\t%d. [%02x-%02x]\n", i, lo, unbytemap_[i]);

				      lo = unbytemap_[i] + 1;

				    }

				    StringAppendF(&map, "\n");

				  }

				  Workq q(size_);

				  AddToQueue(&q, start_);

				  return map + ProgToString(this, &q);

				}

				string Prog::DumpUnanchored() {

				  Workq q(size_);

				  AddToQueue(&q, start_unanchored_);

				  return ProgToString(this, &q);

				}

				static bool IsMatch(Prog*, Prog::Inst*);

				// Peep-hole optimizer.

				void Prog::Optimize() {

				  Workq q(size_);

				  // Eliminate nops.  Most are taken out during compilation

				  // but a few are hard to avoid.

				  q.clear();

				  AddToQueue(&q, start_);

				  for (Workq::iterator i = q.begin(); i != q.end(); ++i) {

				    int id = *i;

				    Inst* ip = inst(id);

				    int j = ip->out();

				    Inst* jp;

				    while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {

				      j = jp->out();

				    }

				    ip->set_out(j);

				    AddToQueue(&q, ip->out());

				    if (ip->opcode() == kInstAlt) {

				      j = ip->out1();

				      while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {

				        j = jp->out();

				      }

				      ip->out1_ = j;

				      AddToQueue(&q, ip->out1());

				    }

				  }

				  // Insert kInstAltMatch instructions

				  // Look for

				  //   ip: Alt -> j | k

				  //	  j: ByteRange [00-FF] -> ip

				  //    k: Match

				  // or the reverse (the above is the greedy one).

				  // Rewrite Alt to AltMatch.

				  q.clear();

				  AddToQueue(&q, start_);

				  for (Workq::iterator i = q.begin(); i != q.end(); ++i) {

				    int id = *i;

				    Inst* ip = inst(id);

				    AddToQueue(&q, ip->out());

				    if (ip->opcode() == kInstAlt)

				      AddToQueue(&q, ip->out1());

				    if (ip->opcode() == kInstAlt) {

				      Inst* j = inst(ip->out());

				      Inst* k = inst(ip->out1());

				      if (j->opcode() == kInstByteRange && j->out() == id &&

				          j->lo() == 0x00 && j->hi() == 0xFF &&

				          IsMatch(this, k)) {

				        ip->set_opcode(kInstAltMatch);

				        continue;

				      }

				      if (IsMatch(this, j) &&

				          k->opcode() == kInstByteRange && k->out() == id &&

				          k->lo() == 0x00 && k->hi() == 0xFF) {

				        ip->set_opcode(kInstAltMatch);

				      }

				    }

				  }

				}

				// Is ip a guaranteed match at end of text, perhaps after some capturing?

				static bool IsMatch(Prog* prog, Prog::Inst* ip) {

				  for (;;) {

				    switch (ip->opcode()) {

				      default:

				        LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();

				        return false;

				      case kInstAlt:

				      case kInstAltMatch:

				      case kInstByteRange:

				      case kInstFail:

				      case kInstEmptyWidth:

				        return false;

				      case kInstCapture:

				      case kInstNop:

				        ip = prog->inst(ip->out());

				        break;

				      case kInstMatch:

				        return true;

				    }

				  }

				}

				uint32 Prog::EmptyFlags(const StringPiece& text, const char* p) {

				  int flags = 0;

				  // ^ and \A

				  if (p == text.begin())

				    flags |= kEmptyBeginText | kEmptyBeginLine;

				  else if (p[-1] == '\n')

				    flags |= kEmptyBeginLine;

				  // $ and \z

				  if (p == text.end())

				    flags |= kEmptyEndText | kEmptyEndLine;

				  else if (p < text.end() && p[0] == '\n')

				    flags |= kEmptyEndLine;

				  // \b and \B

				  if (p == text.begin() && p == text.end()) {

				    // no word boundary here

				  } else if (p == text.begin()) {

				    if (IsWordChar(p[0]))

				      flags |= kEmptyWordBoundary;

				  } else if (p == text.end()) {

				    if (IsWordChar(p[-1]))

				      flags |= kEmptyWordBoundary;

				  } else {

				    if (IsWordChar(p[-1]) != IsWordChar(p[0]))

				      flags |= kEmptyWordBoundary;

				  }

				  if (!(flags & kEmptyWordBoundary))

				    flags |= kEmptyNonWordBoundary;

				  return flags;

				}

				void Prog::MarkByteRange(int lo, int hi) {

				  DCHECK_GE(lo, 0);

				  DCHECK_GE(hi, 0);

				  DCHECK_LE(lo, 255);

				  DCHECK_LE(hi, 255);

				  DCHECK_LE(lo, hi);

				  if (0 < lo && lo <= 255)

				    byterange_.Set(lo - 1);

				  if (0 <= hi && hi <= 255)

				    byterange_.Set(hi);

				}

				void Prog::ComputeByteMap() {

				  // Fill in bytemap with byte classes for prog_.

				  // Ranges of bytes that are treated as indistinguishable

				  // by the regexp program are mapped to a single byte class.

				  // The vector prog_->byterange() marks the end of each

				  // such range.

				  const Bitmap<256>& v = byterange();

				  COMPILE_ASSERT(8*sizeof(v.Word(0)) == 32, wordsize);

				  uint8 n = 0;

				  uint32 bits = 0;

				  for (int i = 0; i < 256; i++) {

				    if ((i&31) == 0)

				      bits = v.Word(i >> 5);

				    bytemap_[i] = n;

				    n += bits & 1;

				    bits >>= 1;

				  }

				  bytemap_range_ = bytemap_[255] + 1;

				  unbytemap_ = new uint8[bytemap_range_];

				  for (int i = 0; i < 256; i++)

				    unbytemap_[bytemap_[i]] = static_cast<uint8>(i);

				  if (0) {  // For debugging: use trivial byte map.

				    for (int i = 0; i < 256; i++) {

				      bytemap_[i] = static_cast<uint8>(i);

				      unbytemap_[i] = static_cast<uint8>(i);

				    }

				    bytemap_range_ = 256;

				    LOG(INFO) << "Using trivial bytemap.";

				  }

				}

				}  // namespace re2

									
										381

third_party/re2/re2/prog.h
									
										vendored
									
				@ -1,381 +0,0 @@

				// Copyright 2007 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Compiled representation of regular expressions.

				// See regexp.h for the Regexp class, which represents a regular

				// expression symbolically.

				#ifndef RE2_PROG_H__

				#define RE2_PROG_H__

				#include "util/util.h"

				#include "util/sparse_array.h"

				#include "re2/re2.h"

				namespace re2 {

				// Simple fixed-size bitmap.

				template<int Bits>

				class Bitmap {

				 public:

				  Bitmap() { Reset(); }

				  int Size() { return Bits; }

				  void Reset() {

				    for (int i = 0; i < Words; i++)

				      w_[i] = 0;

				  }

				  bool Get(int k) const {

				    return w_[k >> WordLog] & (1<<(k & 31));

				  }

				  void Set(int k) {

				    w_[k >> WordLog] |= 1<<(k & 31);

				  }

				  void Clear(int k) {

				    w_[k >> WordLog] &= ~(1<<(k & 31));

				  }

				  uint32 Word(int i) const {

				    return w_[i];

				  }

				 private:

				  static const int WordLog = 5;

				  static const int Words = (Bits+31)/32;

				  uint32 w_[Words];

				  DISALLOW_COPY_AND_ASSIGN(Bitmap);

				};

				// Opcodes for Inst

				enum InstOp {

				  kInstAlt = 0,      // choose between out_ and out1_

				  kInstAltMatch,     // Alt: out_ is [00-FF] and back, out1_ is match; or vice versa.

				  kInstByteRange,    // next (possible case-folded) byte must be in [lo_, hi_]

				  kInstCapture,      // capturing parenthesis number cap_

				  kInstEmptyWidth,   // empty-width special (^ $ ...); bit(s) set in empty_

				  kInstMatch,        // found a match!

				  kInstNop,          // no-op; occasionally unavoidable

				  kInstFail,         // never match; occasionally unavoidable

				};

				// Bit flags for empty-width specials

				enum EmptyOp {

				  kEmptyBeginLine        = 1<<0,      // ^ - beginning of line

				  kEmptyEndLine          = 1<<1,      // $ - end of line

				  kEmptyBeginText        = 1<<2,      // \A - beginning of text

				  kEmptyEndText          = 1<<3,      // \z - end of text

				  kEmptyWordBoundary     = 1<<4,      // \b - word boundary

				  kEmptyNonWordBoundary  = 1<<5,      // \B - not \b

				  kEmptyAllFlags         = (1<<6)-1,

				};

				class Regexp;

				class DFA;

				struct OneState;

				// Compiled form of regexp program.

				class Prog {

				 public:

				  Prog();

				  ~Prog();

				  // Single instruction in regexp program.

				  class Inst {

				   public:

				    Inst() : out_opcode_(0), out1_(0) { }

				    // Constructors per opcode

				    void InitAlt(uint32 out, uint32 out1);

				    void InitByteRange(int lo, int hi, int foldcase, uint32 out);

				    void InitCapture(int cap, uint32 out);

				    void InitEmptyWidth(EmptyOp empty, uint32 out);

				    void InitMatch(int id);

				    void InitNop(uint32 out);

				    void InitFail();

				    // Getters

				    int id(Prog* p) { return static_cast<int>(this - p->inst_); }

				    InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }

				    int out()     { return out_opcode_>>3; }

				    int out1()    { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }

				    int cap()       { DCHECK_EQ(opcode(), kInstCapture); return cap_; }

				    int lo()        { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }

				    int hi()        { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }

				    int foldcase()  { DCHECK_EQ(opcode(), kInstByteRange); return foldcase_; }

				    int match_id()  { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }

				    EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }

				    bool greedy(Prog *p) {

				      DCHECK_EQ(opcode(), kInstAltMatch);

				      return p->inst(out())->opcode() == kInstByteRange;

				    }

				    // Does this inst (an kInstByteRange) match c?

				    inline bool Matches(int c) {

				      DCHECK_EQ(opcode(), kInstByteRange);

				      if (foldcase_ && 'A' <= c && c <= 'Z')

				        c += 'a' - 'A';

				      return lo_ <= c && c <= hi_;

				    }

				    // Returns string representation for debugging.

				    string Dump();

				    // Maximum instruction id.

				    // (Must fit in out_opcode_, and PatchList steals another bit.)

				    static const int kMaxInst = (1<<28) - 1;

				   private:

				    void set_opcode(InstOp opcode) {

				      out_opcode_ = (out()<<3) | opcode;

				    }

				    void set_out(int out) {

				      out_opcode_ = (out<<3) | opcode();

				    }

				    void set_out_opcode(int out, InstOp opcode) {

				      out_opcode_ = (out<<3) | opcode;

				    }

				    uint32 out_opcode_;  // 29 bits of out, 3 (low) bits opcode

				    union {              // additional instruction arguments:

				      uint32 out1_;      // opcode == kInstAlt

				                         //   alternate next instruction

				      int32 cap_;        // opcode == kInstCapture

				                         //   Index of capture register (holds text

				                         //   position recorded by capturing parentheses).

				                         //   For \n (the submatch for the nth parentheses),

				                         //   the left parenthesis captures into register 2*n

				                         //   and the right one captures into register 2*n+1.

				      int32 match_id_;   // opcode == kInstMatch

				                         //   Match ID to identify this match (for re2::Set).

				      struct {           // opcode == kInstByteRange

				        uint8 lo_;       //   byte range is lo_-hi_ inclusive

				        uint8 hi_;       //

				        uint8 foldcase_; //   convert A-Z to a-z before checking range.

				      };

				      EmptyOp empty_;    // opcode == kInstEmptyWidth

				                         //   empty_ is bitwise OR of kEmpty* flags above.

				    };

				    friend class Compiler;

				    friend struct PatchList;

				    friend class Prog;

				    DISALLOW_COPY_AND_ASSIGN(Inst);

				  };

				  // Whether to anchor the search.

				  enum Anchor {

				    kUnanchored,  // match anywhere

				    kAnchored,    // match only starting at beginning of text

				  };

				  // Kind of match to look for (for anchor != kFullMatch)

				  //

				  // kLongestMatch mode finds the overall longest

				  // match but still makes its submatch choices the way

				  // Perl would, not in the way prescribed by POSIX.

				  // The POSIX rules are much more expensive to implement,

				  // and no one has needed them.

				  //

				  // kFullMatch is not strictly necessary -- we could use

				  // kLongestMatch and then check the length of the match -- but

				  // the matching code can run faster if it knows to consider only

				  // full matches.

				  enum MatchKind {

				    kFirstMatch,     // like Perl, PCRE

				    kLongestMatch,   // like egrep or POSIX

				    kFullMatch,      // match only entire text; implies anchor==kAnchored

				    kManyMatch       // for SearchDFA, records set of matches

				  };

				  Inst *inst(int id) { return &inst_[id]; }

				  int start() { return start_; }

				  int start_unanchored() { return start_unanchored_; }

				  void set_start(int start) { start_ = start; }

				  void set_start_unanchored(int start) { start_unanchored_ = start; }

				  int size() { return size_; }

				  bool reversed() { return reversed_; }

				  void set_reversed(bool reversed) { reversed_ = reversed; }

				  int byte_inst_count() { return byte_inst_count_; }

				  const Bitmap<256>& byterange() { return byterange_; }

				  void set_dfa_mem(int64 dfa_mem) { dfa_mem_ = dfa_mem; }

				  int64 dfa_mem() { return dfa_mem_; }

				  int flags() { return flags_; }

				  void set_flags(int flags) { flags_ = flags; }

				  bool anchor_start() { return anchor_start_; }

				  void set_anchor_start(bool b) { anchor_start_ = b; }

				  bool anchor_end() { return anchor_end_; }

				  void set_anchor_end(bool b) { anchor_end_ = b; }

				  int bytemap_range() { return bytemap_range_; }

				  const uint8* bytemap() { return bytemap_; }

				  // Returns string representation of program for debugging.

				  string Dump();

				  string DumpUnanchored();

				  // Record that at some point in the prog, the bytes in the range

				  // lo-hi (inclusive) are treated as different from bytes outside the range.

				  // Tracking this lets the DFA collapse commonly-treated byte ranges

				  // when recording state pointers, greatly reducing its memory footprint.

				  void MarkByteRange(int lo, int hi);

				  // Returns the set of kEmpty flags that are in effect at

				  // position p within context.

				  static uint32 EmptyFlags(const StringPiece& context, const char* p);

				  // Returns whether byte c is a word character: ASCII only.

				  // Used by the implementation of \b and \B.

				  // This is not right for Unicode, but:

				  //   - it's hard to get right in a byte-at-a-time matching world

				  //     (the DFA has only one-byte lookahead).

				  //   - even if the lookahead were possible, the Progs would be huge.

				  // This crude approximation is the same one PCRE uses.

				  static bool IsWordChar(uint8 c) {

				    return ('A' <= c && c <= 'Z') ||

				           ('a' <= c && c <= 'z') ||

				           ('0' <= c && c <= '9') ||

				           c == '_';

				  }

				  // Execution engines.  They all search for the regexp (run the prog)

				  // in text, which is in the larger context (used for ^ $ \b etc).

				  // Anchor and kind control the kind of search.

				  // Returns true if match found, false if not.

				  // If match found, fills match[0..nmatch-1] with submatch info.

				  // match[0] is overall match, match[1] is first set of parens, etc.

				  // If a particular submatch is not matched during the regexp match,

				  // it is set to NULL.

				  //

				  // Matching text == StringPiece(NULL, 0) is treated as any other empty

				  // string, but note that on return, it will not be possible to distinguish

				  // submatches that matched that empty string from submatches that didn't

				  // match anything.  Either way, match[i] == NULL.

				  // Search using NFA: can find submatches but kind of slow.

				  bool SearchNFA(const StringPiece& text, const StringPiece& context,

				                 Anchor anchor, MatchKind kind,

				                 StringPiece* match, int nmatch);

				  // Search using DFA: much faster than NFA but only finds

				  // end of match and can use a lot more memory.

				  // Returns whether a match was found.

				  // If the DFA runs out of memory, sets *failed to true and returns false.

				  // If matches != NULL and kind == kManyMatch and there is a match,

				  // SearchDFA fills matches with the match IDs of the final matching state.

				  bool SearchDFA(const StringPiece& text, const StringPiece& context,

				                 Anchor anchor, MatchKind kind,

				                 StringPiece* match0, bool* failed,

				                 vector<int>* matches);

				  // Build the entire DFA for the given match kind.  FOR TESTING ONLY.

				  // Usually the DFA is built out incrementally, as needed, which

				  // avoids lots of unnecessary work.  This function is useful only

				  // for testing purposes.  Returns number of states.

				  int BuildEntireDFA(MatchKind kind);

				  // Compute byte map.

				  void ComputeByteMap();

				  // Run peep-hole optimizer on program.

				  void Optimize();

				  // One-pass NFA: only correct if IsOnePass() is true,

				  // but much faster than NFA (competitive with PCRE)

				  // for those expressions.

				  bool IsOnePass();

				  bool SearchOnePass(const StringPiece& text, const StringPiece& context,

				                     Anchor anchor, MatchKind kind,

				                     StringPiece* match, int nmatch);

				  // Bit-state backtracking.  Fast on small cases but uses memory

				  // proportional to the product of the program size and the text size.

				  bool SearchBitState(const StringPiece& text, const StringPiece& context,

				                      Anchor anchor, MatchKind kind,

				                      StringPiece* match, int nmatch);

				  static const int kMaxOnePassCapture = 5;  // $0 through $4

				  // Backtracking search: the gold standard against which the other

				  // implementations are checked.  FOR TESTING ONLY.

				  // It allocates a ton of memory to avoid running forever.

				  // It is also recursive, so can't use in production (will overflow stacks).

				  // The name "Unsafe" here is supposed to be a flag that

				  // you should not be using this function.

				  bool UnsafeSearchBacktrack(const StringPiece& text,

				                             const StringPiece& context,

				                             Anchor anchor, MatchKind kind,

				                             StringPiece* match, int nmatch);

				  // Computes range for any strings matching regexp. The min and max can in

				  // some cases be arbitrarily precise, so the caller gets to specify the

				  // maximum desired length of string returned.

				  //

				  // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any

				  // string s that is an anchored match for this regexp satisfies

				  //   min <= s && s <= max.

				  //

				  // Note that PossibleMatchRange() will only consider the first copy of an

				  // infinitely repeated element (i.e., any regexp element followed by a '*' or

				  // '+' operator). Regexps with "{N}" constructions are not affected, as those

				  // do not compile down to infinite repetitions.

				  //

				  // Returns true on success, false on error.

				  bool PossibleMatchRange(string* min, string* max, int maxlen);

				  // EXPERIMENTAL! SUBJECT TO CHANGE!

				  // Outputs the program fanout into the given sparse array.

				  void Fanout(SparseArray<int>* fanout);

				  // Compiles a collection of regexps to Prog.  Each regexp will have

				  // its own Match instruction recording the index in the vector.

				  static Prog* CompileSet(const RE2::Options& options, RE2::Anchor anchor,

				                          Regexp* re);

				 private:

				  friend class Compiler;

				  DFA* GetDFA(MatchKind kind);

				  bool anchor_start_;       // regexp has explicit start anchor

				  bool anchor_end_;         // regexp has explicit end anchor

				  bool reversed_;           // whether program runs backward over input

				  bool did_onepass_;        // has IsOnePass been called?

				  int start_;               // entry point for program

				  int start_unanchored_;    // unanchored entry point for program

				  int size_;                // number of instructions

				  int byte_inst_count_;     // number of kInstByteRange instructions

				  int bytemap_range_;       // bytemap_[x] < bytemap_range_

				  int flags_;               // regexp parse flags

				  int onepass_statesize_;   // byte size of each OneState* node

				  Inst* inst_;              // pointer to instruction array

				  Mutex dfa_mutex_;    // Protects dfa_first_, dfa_longest_

				  DFA* volatile dfa_first_;     // DFA cached for kFirstMatch

				  DFA* volatile dfa_longest_;   // DFA cached for kLongestMatch and kFullMatch

				  int64 dfa_mem_;      // Maximum memory for DFAs.

				  void (*delete_dfa_)(DFA* dfa);

				  Bitmap<256> byterange_;    // byterange.Get(x) true if x ends a

				                             // commonly-treated byte range.

				  uint8 bytemap_[256];       // map from input bytes to byte classes

				  uint8 *unbytemap_;         // bytemap_[unbytemap_[x]] == x

				  uint8* onepass_nodes_;     // data for OnePass nodes

				  OneState* onepass_start_;  // start node for OnePass program

				  DISALLOW_COPY_AND_ASSIGN(Prog);

				};

				}  // namespace re2

				#endif  // RE2_PROG_H__

1239

third_party/re2/re2/re2.cc vendored

File diff suppressed because it is too large Load Diff

									
										882

third_party/re2/re2/re2.h
									
										vendored
									
				@ -1,882 +0,0 @@

				// Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				#ifndef RE2_RE2_H

				#define RE2_RE2_H

				// C++ interface to the re2 regular-expression library.

				// RE2 supports Perl-style regular expressions (with extensions like

				// \d, \w, \s, ...).

				//

				// -----------------------------------------------------------------------

				// REGEXP SYNTAX:

				//

				// This module uses the re2 library and hence supports

				// its syntax for regular expressions, which is similar to Perl's with

				// some of the more complicated things thrown away.  In particular,

				// backreferences and generalized assertions are not available, nor is \Z.

				//

				// See https://github.com/google/re2/wiki/Syntax for the syntax

				// supported by RE2, and a comparison with PCRE and PERL regexps.

				//

				// For those not familiar with Perl's regular expressions,

				// here are some examples of the most commonly used extensions:

				//

				//   "hello (\\w+) world"  -- \w matches a "word" character

				//   "version (\\d+)"      -- \d matches a digit

				//   "hello\\s+world"      -- \s matches any whitespace character

				//   "\\b(\\w+)\\b"        -- \b matches non-empty string at word boundary

				//   "(?i)hello"           -- (?i) turns on case-insensitive matching

				//   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible

				//

				// -----------------------------------------------------------------------

				// MATCHING INTERFACE:

				//

				// The "FullMatch" operation checks that supplied text matches a

				// supplied pattern exactly.

				//

				// Example: successful match

				//    CHECK(RE2::FullMatch("hello", "h.*o"));

				//

				// Example: unsuccessful match (requires full match):

				//    CHECK(!RE2::FullMatch("hello", "e"));

				//

				// -----------------------------------------------------------------------

				// UTF-8 AND THE MATCHING INTERFACE:

				//

				// By default, the pattern and input text are interpreted as UTF-8.

				// The RE2::Latin1 option causes them to be interpreted as Latin-1.

				//

				// Example:

				//    CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));

				//    CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));

				//

				// -----------------------------------------------------------------------

				// MATCHING WITH SUB-STRING EXTRACTION:

				//

				// You can supply extra pointer arguments to extract matched subpieces.

				//

				// Example: extracts "ruby" into "s" and 1234 into "i"

				//    int i;

				//    string s;

				//    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));

				//

				// Example: fails because string cannot be stored in integer

				//    CHECK(!RE2::FullMatch("ruby", "(.*)", &i));

				//

				// Example: fails because there aren't enough sub-patterns:

				//    CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));

				//

				// Example: does not try to extract any extra sub-patterns

				//    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));

				//

				// Example: does not try to extract into NULL

				//    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));

				//

				// Example: integer overflow causes failure

				//    CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));

				//

				// NOTE(rsc): Asking for substrings slows successful matches quite a bit.

				// This may get a little faster in the future, but right now is slower

				// than PCRE.  On the other hand, failed matches run *very* fast (faster

				// than PCRE), as do matches without substring extraction.

				//

				// -----------------------------------------------------------------------

				// PARTIAL MATCHES

				//

				// You can use the "PartialMatch" operation when you want the pattern

				// to match any substring of the text.

				//

				// Example: simple search for a string:

				//      CHECK(RE2::PartialMatch("hello", "ell"));

				//

				// Example: find first number in a string

				//      int number;

				//      CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));

				//      CHECK_EQ(number, 100);

				//

				// -----------------------------------------------------------------------

				// PRE-COMPILED REGULAR EXPRESSIONS

				//

				// RE2 makes it easy to use any string as a regular expression, without

				// requiring a separate compilation step.

				//

				// If speed is of the essence, you can create a pre-compiled "RE2"

				// object from the pattern and use it multiple times.  If you do so,

				// you can typically parse text faster than with sscanf.

				//

				// Example: precompile pattern for faster matching:

				//    RE2 pattern("h.*o");

				//    while (ReadLine(&str)) {

				//      if (RE2::FullMatch(str, pattern)) ...;

				//    }

				//

				// -----------------------------------------------------------------------

				// SCANNING TEXT INCREMENTALLY

				//

				// The "Consume" operation may be useful if you want to repeatedly

				// match regular expressions at the front of a string and skip over

				// them as they match.  This requires use of the "StringPiece" type,

				// which represents a sub-range of a real string.

				//

				// Example: read lines of the form "var = value" from a string.

				//      string contents = ...;          // Fill string somehow

				//      StringPiece input(contents);    // Wrap a StringPiece around it

				//

				//      string var;

				//      int value;

				//      while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {

				//        ...;

				//      }

				//

				// Each successful call to "Consume" will set "var/value", and also

				// advance "input" so it points past the matched text.  Note that if the

				// regular expression matches an empty string, input will advance

				// by 0 bytes.  If the regular expression being used might match

				// an empty string, the loop body must check for this case and either

				// advance the string or break out of the loop.

				//

				// The "FindAndConsume" operation is similar to "Consume" but does not

				// anchor your match at the beginning of the string.  For example, you

				// could extract all words from a string by repeatedly calling

				//     RE2::FindAndConsume(&input, "(\\w+)", &word)

				//

				// -----------------------------------------------------------------------

				// USING VARIABLE NUMBER OF ARGUMENTS

				//

				// The above operations require you to know the number of arguments

				// when you write the code.  This is not always possible or easy (for

				// example, the regular expression may be calculated at run time).

				// You can use the "N" version of the operations when the number of

				// match arguments are determined at run time.

				//

				// Example:

				//   const RE2::Arg* args[10];

				//   int n;

				//   // ... populate args with pointers to RE2::Arg values ...

				//   // ... set n to the number of RE2::Arg objects ...

				//   bool match = RE2::FullMatchN(input, pattern, args, n);

				//

				// The last statement is equivalent to

				//

				//   bool match = RE2::FullMatch(input, pattern,

				//                               *args[0], *args[1], ..., *args[n - 1]);

				//

				// -----------------------------------------------------------------------

				// PARSING HEX/OCTAL/C-RADIX NUMBERS

				//

				// By default, if you pass a pointer to a numeric value, the

				// corresponding text is interpreted as a base-10 number.  You can

				// instead wrap the pointer with a call to one of the operators Hex(),

				// Octal(), or CRadix() to interpret the text in another base.  The

				// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)

				// prefixes, but defaults to base-10.

				//

				// Example:

				//   int a, b, c, d;

				//   CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",

				//         RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));

				// will leave 64 in a, b, c, and d.

				#include <stdint.h>

				#include <map>

				#include <string>

				#include "re2/stringpiece.h"

				#include "re2/variadic_function.h"

				#ifndef RE2_HAVE_LONGLONG

				#define RE2_HAVE_LONGLONG 1

				#endif

				namespace re2 {

				using std::string;

				using std::map;

				class Mutex;

				class Prog;

				class Regexp;

				// The following enum should be used only as a constructor argument to indicate

				// that the variable has static storage class, and that the constructor should

				// do nothing to its state.  It indicates to the reader that it is legal to

				// declare a static instance of the class, provided the constructor is given

				// the LINKER_INITIALIZED argument.  Normally, it is unsafe to declare a

				// static variable that has a constructor or a destructor because invocation

				// order is undefined.  However, IF the type can be initialized by filling with

				// zeroes (which the loader does for static variables), AND the type's

				// destructor does nothing to the storage, then a constructor for static

				// initialization can be declared as

				//       explicit MyClass(LinkerInitialized x) {}

				// and invoked as

				//       static MyClass my_variable_name(LINKER_INITIALIZED);

				enum LinkerInitialized { LINKER_INITIALIZED };

				// Interface for regular expression matching.  Also corresponds to a

				// pre-compiled regular expression.  An "RE2" object is safe for

				// concurrent use by multiple threads.

				class RE2 {

				 public:

				  // We convert user-passed pointers into special Arg objects

				  class Arg;

				  class Options;

				  // Defined in set.h.

				  class Set;

				  enum ErrorCode {

				    NoError = 0,

				    // Unexpected error

				    ErrorInternal,

				    // Parse errors

				    ErrorBadEscape,          // bad escape sequence

				    ErrorBadCharClass,       // bad character class

				    ErrorBadCharRange,       // bad character class range

				    ErrorMissingBracket,     // missing closing ]

				    ErrorMissingParen,       // missing closing )

				    ErrorTrailingBackslash,  // trailing \ at end of regexp

				    ErrorRepeatArgument,     // repeat argument missing, e.g. "*"

				    ErrorRepeatSize,         // bad repetition argument

				    ErrorRepeatOp,           // bad repetition operator

				    ErrorBadPerlOp,          // bad perl operator

				    ErrorBadUTF8,            // invalid UTF-8 in regexp

				    ErrorBadNamedCapture,    // bad named capture group

				    ErrorPatternTooLarge     // pattern too large (compile failed)

				  };

				  // Predefined common options.

				  // If you need more complicated things, instantiate

				  // an Option class, possibly passing one of these to

				  // the Option constructor, change the settings, and pass that

				  // Option class to the RE2 constructor.

				  enum CannedOptions {

				    DefaultOptions = 0,

				    Latin1, // treat input as Latin-1 (default UTF-8)

				    POSIX, // POSIX syntax, leftmost-longest match

				    Quiet // do not log about regexp parse errors

				  };

				  // Need to have the const char* and const string& forms for implicit

				  // conversions when passing string literals to FullMatch and PartialMatch.

				  // Otherwise the StringPiece form would be sufficient.

				#ifndef SWIG

				  RE2(const char* pattern);

				  RE2(const string& pattern);

				#endif

				  RE2(const StringPiece& pattern);

				  RE2(const StringPiece& pattern, const Options& option);

				  ~RE2();

				  // Returns whether RE2 was created properly.

				  bool ok() const { return error_code() == NoError; }

				  // The string specification for this RE2.  E.g.

				  //   RE2 re("ab*c?d+");

				  //   re.pattern();    // "ab*c?d+"

				  const string& pattern() const { return pattern_; }

				  // If RE2 could not be created properly, returns an error string.

				  // Else returns the empty string.

				  const string& error() const { return *error_; }

				  // If RE2 could not be created properly, returns an error code.

				  // Else returns RE2::NoError (== 0).

				  ErrorCode error_code() const { return error_code_; }

				  // If RE2 could not be created properly, returns the offending

				  // portion of the regexp.

				  const string& error_arg() const { return error_arg_; }

				  // Returns the program size, a very approximate measure of a regexp's "cost".

				  // Larger numbers are more expensive than smaller numbers.

				  int ProgramSize() const;

				  // EXPERIMENTAL! SUBJECT TO CHANGE!

				  // Outputs the program fanout as a histogram bucketed by powers of 2.

				  // Returns the number of the largest non-empty bucket.

				  int ProgramFanout(map<int, int>* histogram) const;

				  // Returns the underlying Regexp; not for general use.

				  // Returns entire_regexp_ so that callers don't need

				  // to know about prefix_ and prefix_foldcase_.

				  re2::Regexp* Regexp() const { return entire_regexp_; }

				  /***** The useful part: the matching interface *****/

				  // Matches "text" against "pattern".  If pointer arguments are

				  // supplied, copies matched sub-patterns into them.

				  //

				  // You can pass in a "const char*" or a "string" for "text".

				  // You can pass in a "const char*" or a "string" or a "RE2" for "pattern".

				  //

				  // The provided pointer arguments can be pointers to any scalar numeric

				  // type, or one of:

				  //    string          (matched piece is copied to string)

				  //    StringPiece     (StringPiece is mutated to point to matched piece)

				  //    T               (where "bool T::ParseFrom(const char*, int)" exists)

				  //    (void*)NULL     (the corresponding matched sub-pattern is not copied)

				  //

				  // Returns true iff all of the following conditions are satisfied:

				  //   a. "text" matches "pattern" exactly

				  //   b. The number of matched sub-patterns is >= number of supplied pointers

				  //   c. The "i"th argument has a suitable type for holding the

				  //      string captured as the "i"th sub-pattern.  If you pass in

				  //      NULL for the "i"th argument, or pass fewer arguments than

				  //      number of sub-patterns, "i"th captured sub-pattern is

				  //      ignored.

				  //

				  // CAVEAT: An optional sub-pattern that does not exist in the

				  // matched string is assigned the empty string.  Therefore, the

				  // following will return false (because the empty string is not a

				  // valid number):

				  //    int number;

				  //    RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);

				  static bool FullMatchN(const StringPiece& text, const RE2& re,

				                         const Arg* const args[], int argc);

				  static const VariadicFunction2<

				      bool, const StringPiece&, const RE2&, Arg, RE2::FullMatchN> FullMatch;

				  // Exactly like FullMatch(), except that "pattern" is allowed to match

				  // a substring of "text".

				  static bool PartialMatchN(const StringPiece& text, const RE2& re, // 3..16 args

				                            const Arg* const args[], int argc);

				  static const VariadicFunction2<

				      bool, const StringPiece&, const RE2&, Arg, RE2::PartialMatchN> PartialMatch;

				  // Like FullMatch() and PartialMatch(), except that pattern has to

				  // match a prefix of "text", and "input" is advanced past the matched

				  // text.  Note: "input" is modified iff this routine returns true.

				  static bool ConsumeN(StringPiece* input, const RE2& pattern, // 3..16 args

				                       const Arg* const args[], int argc);

				  static const VariadicFunction2<

				      bool, StringPiece*, const RE2&, Arg, RE2::ConsumeN> Consume;

				  // Like Consume(..), but does not anchor the match at the beginning of the

				  // string.  That is, "pattern" need not start its match at the beginning of

				  // "input".  For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next

				  // word in "s" and stores it in "word".

				  static bool FindAndConsumeN(StringPiece* input, const RE2& pattern,

				                             const Arg* const args[], int argc);

				  static const VariadicFunction2<

				      bool, StringPiece*, const RE2&, Arg, RE2::FindAndConsumeN> FindAndConsume;

				  // Replace the first match of "pattern" in "str" with "rewrite".

				  // Within "rewrite", backslash-escaped digits (\1 to \9) can be

				  // used to insert text matching corresponding parenthesized group

				  // from the pattern.  \0 in "rewrite" refers to the entire matching

				  // text.  E.g.,

				  //

				  //   string s = "yabba dabba doo";

				  //   CHECK(RE2::Replace(&s, "b+", "d"));

				  //

				  // will leave "s" containing "yada dabba doo"

				  //

				  // Returns true if the pattern matches and a replacement occurs,

				  // false otherwise.

				  static bool Replace(string *str,

				                      const RE2& pattern,

				                      const StringPiece& rewrite);

				  // Like Replace(), except replaces successive non-overlapping occurrences

				  // of the pattern in the string with the rewrite. E.g.

				  //

				  //   string s = "yabba dabba doo";

				  //   CHECK(RE2::GlobalReplace(&s, "b+", "d"));

				  //

				  // will leave "s" containing "yada dada doo"

				  // Replacements are not subject to re-matching.

				  //

				  // Because GlobalReplace only replaces non-overlapping matches,

				  // replacing "ana" within "banana" makes only one replacement, not two.

				  //

				  // Returns the number of replacements made.

				  static int GlobalReplace(string *str,

				                           const RE2& pattern,

				                           const StringPiece& rewrite);

				  // Like Replace, except that if the pattern matches, "rewrite"

				  // is copied into "out" with substitutions.  The non-matching

				  // portions of "text" are ignored.

				  //

				  // Returns true iff a match occurred and the extraction happened

				  // successfully;  if no match occurs, the string is left unaffected.

				  //

				  // REQUIRES: "text" must not alias any part of "*out".

				  static bool Extract(const StringPiece &text,

				                      const RE2& pattern,

				                      const StringPiece &rewrite,

				                      string *out);

				  // Escapes all potentially meaningful regexp characters in

				  // 'unquoted'.  The returned string, used as a regular expression,

				  // will exactly match the original string.  For example,

				  //           1.5-2.0?

				  // may become:

				  //           1\.5\-2\.0\?

				  static string QuoteMeta(const StringPiece& unquoted);

				  // Computes range for any strings matching regexp. The min and max can in

				  // some cases be arbitrarily precise, so the caller gets to specify the

				  // maximum desired length of string returned.

				  //

				  // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any

				  // string s that is an anchored match for this regexp satisfies

				  //   min <= s && s <= max.

				  //

				  // Note that PossibleMatchRange() will only consider the first copy of an

				  // infinitely repeated element (i.e., any regexp element followed by a '*' or

				  // '+' operator). Regexps with "{N}" constructions are not affected, as those

				  // do not compile down to infinite repetitions.

				  //

				  // Returns true on success, false on error.

				  bool PossibleMatchRange(string* min, string* max, int maxlen) const;

				  // Generic matching interface

				  // Type of match.

				  enum Anchor {

				    UNANCHORED,         // No anchoring

				    ANCHOR_START,       // Anchor at start only

				    ANCHOR_BOTH         // Anchor at start and end

				  };

				  // Return the number of capturing subpatterns, or -1 if the

				  // regexp wasn't valid on construction.  The overall match ($0)

				  // does not count: if the regexp is "(a)(b)", returns 2.

				  int NumberOfCapturingGroups() const;

				  // Return a map from names to capturing indices.

				  // The map records the index of the leftmost group

				  // with the given name.

				  // Only valid until the re is deleted.

				  const map<string, int>& NamedCapturingGroups() const;

				  // Return a map from capturing indices to names.

				  // The map has no entries for unnamed groups.

				  // Only valid until the re is deleted.

				  const map<int, string>& CapturingGroupNames() const;

				  // General matching routine.

				  // Match against text starting at offset startpos

				  // and stopping the search at offset endpos.

				  // Returns true if match found, false if not.

				  // On a successful match, fills in match[] (up to nmatch entries)

				  // with information about submatches.

				  // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true,

				  // setting match[0] = "barbaz", match[1] = NULL, match[2] = "bar",

				  // match[3] = NULL, ..., up to match[nmatch-1] = NULL.

				  //

				  // Don't ask for more match information than you will use:

				  // runs much faster with nmatch == 1 than nmatch > 1, and

				  // runs even faster if nmatch == 0.

				  // Doesn't make sense to use nmatch > 1 + NumberOfCapturingGroups(),

				  // but will be handled correctly.

				  //

				  // Passing text == StringPiece(NULL, 0) will be handled like any other

				  // empty string, but note that on return, it will not be possible to tell

				  // whether submatch i matched the empty string or did not match:

				  // either way, match[i] == NULL.

				  bool Match(const StringPiece& text,

				             int startpos,

				             int endpos,

				             Anchor anchor,

				             StringPiece *match,

				             int nmatch) const;

				  // Check that the given rewrite string is suitable for use with this

				  // regular expression.  It checks that:

				  //   * The regular expression has enough parenthesized subexpressions

				  //     to satisfy all of the \N tokens in rewrite

				  //   * The rewrite string doesn't have any syntax errors.  E.g.,

				  //     '\' followed by anything other than a digit or '\'.

				  // A true return value guarantees that Replace() and Extract() won't

				  // fail because of a bad rewrite string.

				  bool CheckRewriteString(const StringPiece& rewrite, string* error) const;

				  // Returns the maximum submatch needed for the rewrite to be done by

				  // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2.

				  static int MaxSubmatch(const StringPiece& rewrite);

				  // Append the "rewrite" string, with backslash subsitutions from "vec",

				  // to string "out".

				  // Returns true on success.  This method can fail because of a malformed

				  // rewrite string.  CheckRewriteString guarantees that the rewrite will

				  // be sucessful.

				  bool Rewrite(string *out,

				               const StringPiece &rewrite,

				               const StringPiece* vec,

				               int veclen) const;

				  // Constructor options

				  class Options {

				   public:

				    // The options are (defaults in parentheses):

				    //

				    //   utf8             (true)  text and pattern are UTF-8; otherwise Latin-1

				    //   posix_syntax     (false) restrict regexps to POSIX egrep syntax

				    //   longest_match    (false) search for longest match, not first match

				    //   log_errors       (true)  log syntax and execution errors to ERROR

				    //   max_mem          (see below)  approx. max memory footprint of RE2

				    //   literal          (false) interpret string as literal, not regexp

				    //   never_nl         (false) never match \n, even if it is in regexp

				    //   dot_nl           (false) dot matches everything including new line

				    //   never_capture    (false) parse all parens as non-capturing

				    //   case_sensitive   (true)  match is case-sensitive (regexp can override

				    //                              with (?i) unless in posix_syntax mode)

				    //

				    // The following options are only consulted when posix_syntax == true.

				    // (When posix_syntax == false these features are always enabled and

				    // cannot be turned off.)

				    //   perl_classes     (false) allow Perl's \d \s \w \D \S \W

				    //   word_boundary    (false) allow Perl's \b \B (word boundary and not)

				    //   one_line         (false) ^ and $ only match beginning and end of text

				    //

				    // The max_mem option controls how much memory can be used

				    // to hold the compiled form of the regexp (the Prog) and

				    // its cached DFA graphs.  Code Search placed limits on the number

				    // of Prog instructions and DFA states: 10,000 for both.

				    // In RE2, those limits would translate to about 240 KB per Prog

				    // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a

				    // better job of keeping them small than Code Search did).

				    // Each RE2 has two Progs (one forward, one reverse), and each Prog

				    // can have two DFAs (one first match, one longest match).

				    // That makes 4 DFAs:

				    //

				    //   forward, first-match    - used for UNANCHORED or ANCHOR_LEFT searches

				    //                               if opt.longest_match() == false

				    //   forward, longest-match  - used for all ANCHOR_BOTH searches,

				    //                               and the other two kinds if

				    //                               opt.longest_match() == true

				    //   reverse, first-match    - never used

				    //   reverse, longest-match  - used as second phase for unanchored searches

				    //

				    // The RE2 memory budget is statically divided between the two

				    // Progs and then the DFAs: two thirds to the forward Prog

				    // and one third to the reverse Prog.  The forward Prog gives half

				    // of what it has left over to each of its DFAs.  The reverse Prog

				    // gives it all to its longest-match DFA.

				    //

				    // Once a DFA fills its budget, it flushes its cache and starts over.

				    // If this happens too often, RE2 falls back on the NFA implementation.

				    // For now, make the default budget something close to Code Search.

				    static const int kDefaultMaxMem = 8<<20;

				    enum Encoding {

				      EncodingUTF8 = 1,

				      EncodingLatin1

				    };

				    Options() :

				      encoding_(EncodingUTF8),

				      posix_syntax_(false),

				      longest_match_(false),

				      log_errors_(true),

				      max_mem_(kDefaultMaxMem),

				      literal_(false),

				      never_nl_(false),

				      dot_nl_(false),

				      never_capture_(false),

				      case_sensitive_(true),

				      perl_classes_(false),

				      word_boundary_(false),

				      one_line_(false) {

				    }

				    /*implicit*/ Options(CannedOptions);

				    Encoding encoding() const { return encoding_; }

				    void set_encoding(Encoding encoding) { encoding_ = encoding; }

				    // Legacy interface to encoding.

				    // TODO(rsc): Remove once clients have been converted.

				    bool utf8() const { return encoding_ == EncodingUTF8; }

				    void set_utf8(bool b) {

				      if (b) {

				        encoding_ = EncodingUTF8;

				      } else {

				        encoding_ = EncodingLatin1;

				      }

				    }

				    bool posix_syntax() const { return posix_syntax_; }

				    void set_posix_syntax(bool b) { posix_syntax_ = b; }

				    bool longest_match() const { return longest_match_; }

				    void set_longest_match(bool b) { longest_match_ = b; }

				    bool log_errors() const { return log_errors_; }

				    void set_log_errors(bool b) { log_errors_ = b; }

				    int64_t max_mem() const { return max_mem_; }

				    void set_max_mem(int64_t m) { max_mem_ = m; }

				    bool literal() const { return literal_; }

				    void set_literal(bool b) { literal_ = b; }

				    bool never_nl() const { return never_nl_; }

				    void set_never_nl(bool b) { never_nl_ = b; }

				    bool dot_nl() const { return dot_nl_; }

				    void set_dot_nl(bool b) { dot_nl_ = b; }

				    bool never_capture() const { return never_capture_; }

				    void set_never_capture(bool b) { never_capture_ = b; }

				    bool case_sensitive() const { return case_sensitive_; }

				    void set_case_sensitive(bool b) { case_sensitive_ = b; }

				    bool perl_classes() const { return perl_classes_; }

				    void set_perl_classes(bool b) { perl_classes_ = b; }

				    bool word_boundary() const { return word_boundary_; }

				    void set_word_boundary(bool b) { word_boundary_ = b; }

				    bool one_line() const { return one_line_; }

				    void set_one_line(bool b) { one_line_ = b; }

				    void Copy(const Options& src) {

				      encoding_ = src.encoding_;

				      posix_syntax_ = src.posix_syntax_;

				      longest_match_ = src.longest_match_;

				      log_errors_ = src.log_errors_;

				      max_mem_ = src.max_mem_;

				      literal_ = src.literal_;

				      never_nl_ = src.never_nl_;

				      dot_nl_ = src.dot_nl_;

				      never_capture_ = src.never_capture_;

				      case_sensitive_ = src.case_sensitive_;

				      perl_classes_ = src.perl_classes_;

				      word_boundary_ = src.word_boundary_;

				      one_line_ = src.one_line_;

				    }

				    int ParseFlags() const;

				   private:

				    Encoding encoding_;

				    bool posix_syntax_;

				    bool longest_match_;

				    bool log_errors_;

				    int64_t max_mem_;

				    bool literal_;

				    bool never_nl_;

				    bool dot_nl_;

				    bool never_capture_;

				    bool case_sensitive_;

				    bool perl_classes_;

				    bool word_boundary_;

				    bool one_line_;

				    //DISALLOW_COPY_AND_ASSIGN(Options);

				    Options(const Options&);

				    void operator=(const Options&);

				  };

				  // Returns the options set in the constructor.

				  const Options& options() const { return options_; };

				  // Argument converters; see below.

				  static inline Arg CRadix(short* x);

				  static inline Arg CRadix(unsigned short* x);

				  static inline Arg CRadix(int* x);

				  static inline Arg CRadix(unsigned int* x);

				  static inline Arg CRadix(long* x);

				  static inline Arg CRadix(unsigned long* x);

				  #if RE2_HAVE_LONGLONG

				  static inline Arg CRadix(long long* x);

				  static inline Arg CRadix(unsigned long long* x);

				  #endif

				  static inline Arg Hex(short* x);

				  static inline Arg Hex(unsigned short* x);

				  static inline Arg Hex(int* x);

				  static inline Arg Hex(unsigned int* x);

				  static inline Arg Hex(long* x);

				  static inline Arg Hex(unsigned long* x);

				  #if RE2_HAVE_LONGLONG

				  static inline Arg Hex(long long* x);

				  static inline Arg Hex(unsigned long long* x);

				  #endif

				  static inline Arg Octal(short* x);

				  static inline Arg Octal(unsigned short* x);

				  static inline Arg Octal(int* x);

				  static inline Arg Octal(unsigned int* x);

				  static inline Arg Octal(long* x);

				  static inline Arg Octal(unsigned long* x);

				  #if RE2_HAVE_LONGLONG

				  static inline Arg Octal(long long* x);

				  static inline Arg Octal(unsigned long long* x);

				  #endif

				 private:

				  void Init(const StringPiece& pattern, const Options& options);

				  bool DoMatch(const StringPiece& text,

				                   Anchor anchor,

				                   int* consumed,

				                   const Arg* const args[],

				                   int n) const;

				  re2::Prog* ReverseProg() const;

				  mutable Mutex*           mutex_;

				  string                   pattern_;       // string regular expression

				  Options                  options_;       // option flags

				  string        prefix_;           // required prefix (before regexp_)

				  bool          prefix_foldcase_;  // prefix is ASCII case-insensitive

				  re2::Regexp*  entire_regexp_;    // parsed regular expression

				  re2::Regexp*  suffix_regexp_;    // parsed regular expression, prefix removed

				  re2::Prog*    prog_;             // compiled program for regexp

				  mutable re2::Prog* rprog_;       // reverse program for regexp

				  bool                     is_one_pass_;   // can use prog_->SearchOnePass?

				  mutable const string*    error_;         // Error indicator

				                                           // (or points to empty string)

				  mutable ErrorCode        error_code_;    // Error code

				  mutable string           error_arg_;     // Fragment of regexp showing error

				  mutable int              num_captures_;  // Number of capturing groups

				  // Map from capture names to indices

				  mutable const map<string, int>* named_groups_;

				  // Map from capture indices to names

				  mutable const map<int, string>* group_names_;

				  //DISALLOW_COPY_AND_ASSIGN(RE2);

				  RE2(const RE2&);

				  void operator=(const RE2&);

				};

				/***** Implementation details *****/

				// Hex/Octal/Binary?

				// Special class for parsing into objects that define a ParseFrom() method

				template <class T>

				class _RE2_MatchObject {

				 public:

				  static inline bool Parse(const char* str, int n, void* dest) {

				    if (dest == NULL) return true;

				    T* object = reinterpret_cast<T*>(dest);

				    return object->ParseFrom(str, n);

				  }

				};

				class RE2::Arg {

				 public:

				  // Empty constructor so we can declare arrays of RE2::Arg

				  Arg();

				  // Constructor specially designed for NULL arguments

				  Arg(void*);

				  typedef bool (*Parser)(const char* str, int n, void* dest);

				// Type-specific parsers

				#define MAKE_PARSER(type,name) \

				  Arg(type* p) : arg_(p), parser_(name) { } \

				  Arg(type* p, Parser parser) : arg_(p), parser_(parser) { } \

				  MAKE_PARSER(char,               parse_char);

				  MAKE_PARSER(signed char,        parse_char);

				  MAKE_PARSER(unsigned char,      parse_uchar);

				  MAKE_PARSER(short,              parse_short);

				  MAKE_PARSER(unsigned short,     parse_ushort);

				  MAKE_PARSER(int,                parse_int);

				  MAKE_PARSER(unsigned int,       parse_uint);

				  MAKE_PARSER(long,               parse_long);

				  MAKE_PARSER(unsigned long,      parse_ulong);

				  #if RE2_HAVE_LONGLONG

				  MAKE_PARSER(long long,          parse_longlong);

				  MAKE_PARSER(unsigned long long, parse_ulonglong);

				  #endif

				  MAKE_PARSER(float,              parse_float);

				  MAKE_PARSER(double,             parse_double);

				  MAKE_PARSER(string,             parse_string);

				  MAKE_PARSER(StringPiece,        parse_stringpiece);

				#undef MAKE_PARSER

				  // Generic constructor templates

				  template <class T> Arg(T* p)

				      : arg_(p), parser_(_RE2_MatchObject<T>::Parse) { }

				  template <class T> Arg(T* p, Parser parser)

				      : arg_(p), parser_(parser) { }

				  // Parse the data

				  bool Parse(const char* str, int n) const;

				 private:

				  void*         arg_;

				  Parser        parser_;

				  static bool parse_null          (const char* str, int n, void* dest);

				  static bool parse_char          (const char* str, int n, void* dest);

				  static bool parse_uchar         (const char* str, int n, void* dest);

				  static bool parse_float         (const char* str, int n, void* dest);

				  static bool parse_double        (const char* str, int n, void* dest);

				  static bool parse_string        (const char* str, int n, void* dest);

				  static bool parse_stringpiece   (const char* str, int n, void* dest);

				#define DECLARE_INTEGER_PARSER(name)                                        \

				 private:                                                                   \

				  static bool parse_ ## name(const char* str, int n, void* dest);           \

				  static bool parse_ ## name ## _radix(                                     \

				    const char* str, int n, void* dest, int radix);                         \

				 public:                                                                    \

				  static bool parse_ ## name ## _hex(const char* str, int n, void* dest);   \

				  static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \

				  static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)

				  DECLARE_INTEGER_PARSER(short);

				  DECLARE_INTEGER_PARSER(ushort);

				  DECLARE_INTEGER_PARSER(int);

				  DECLARE_INTEGER_PARSER(uint);

				  DECLARE_INTEGER_PARSER(long);

				  DECLARE_INTEGER_PARSER(ulong);

				  #if RE2_HAVE_LONGLONG

				  DECLARE_INTEGER_PARSER(longlong);

				  DECLARE_INTEGER_PARSER(ulonglong);

				  #endif

				#undef DECLARE_INTEGER_PARSER

				};

				inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { }

				inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }

				inline bool RE2::Arg::Parse(const char* str, int n) const {

				  return (*parser_)(str, n, arg_);

				}

				// This part of the parser, appropriate only for ints, deals with bases

				#define MAKE_INTEGER_PARSER(type, name) \

				  inline RE2::Arg RE2::Hex(type* ptr) { \

				    return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _hex); } \

				  inline RE2::Arg RE2::Octal(type* ptr) { \

				    return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _octal); } \

				  inline RE2::Arg RE2::CRadix(type* ptr) { \

				    return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _cradix); }

				MAKE_INTEGER_PARSER(short,              short)

				MAKE_INTEGER_PARSER(unsigned short,     ushort)

				MAKE_INTEGER_PARSER(int,                int)

				MAKE_INTEGER_PARSER(unsigned int,       uint)

				MAKE_INTEGER_PARSER(long,               long)

				MAKE_INTEGER_PARSER(unsigned long,      ulong)

				#if RE2_HAVE_LONGLONG

				MAKE_INTEGER_PARSER(long long,          longlong)

				MAKE_INTEGER_PARSER(unsigned long long, ulonglong)

				#endif

				#undef MAKE_INTEGER_PARSER

				}  // namespace re2

				using re2::RE2;

				#endif /* RE2_RE2_H */

									
										938

third_party/re2/re2/regexp.cc
									
										vendored
									
				@ -1,938 +0,0 @@

				// Copyright 2006 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Regular expression representation.

				// Tested by parse_test.cc

				#include "util/util.h"

				#include "re2/regexp.h"

				#include "re2/stringpiece.h"

				#include "re2/walker-inl.h"

				namespace re2 {

				// Constructor.  Allocates vectors as appropriate for operator.

				Regexp::Regexp(RegexpOp op, ParseFlags parse_flags)

				  : op_(static_cast<uint8>(op)),

				    simple_(false),

				    parse_flags_(static_cast<uint16>(parse_flags)),

				    ref_(1),

				    nsub_(0),

				    down_(NULL) {

				  subone_ = NULL;

				  memset(the_union_, 0, sizeof the_union_);

				}

				// Destructor.  Assumes already cleaned up children.

				// Private: use Decref() instead of delete to destroy Regexps.

				// Can't call Decref on the sub-Regexps here because

				// that could cause arbitrarily deep recursion, so

				// required Decref() to have handled them for us.

				Regexp::~Regexp() {

				  if (nsub_ > 0)

				    LOG(DFATAL) << "Regexp not destroyed.";

				  switch (op_) {

				    default:

				      break;

				    case kRegexpCapture:

				      delete name_;

				      break;

				    case kRegexpLiteralString:

				      delete[] runes_;

				      break;

				    case kRegexpCharClass:

				      if (cc_)

				        cc_->Delete();

				      delete ccb_;

				      break;

				  }

				}

				// If it's possible to destroy this regexp without recurring,

				// do so and return true.  Else return false.

				bool Regexp::QuickDestroy() {

				  if (nsub_ == 0) {

				    delete this;

				    return true;

				  }

				  return false;

				}

				static map<Regexp*, int> *ref_map;

				GLOBAL_MUTEX(ref_mutex);

				int Regexp::Ref() {

				  if (ref_ < kMaxRef)

				    return ref_;

				  GLOBAL_MUTEX_LOCK(ref_mutex);

				  int r = 0;

				  if (ref_map != NULL) {

				    r = (*ref_map)[this];

				  }

				  GLOBAL_MUTEX_UNLOCK(ref_mutex);

				  return r;

				}

				// Increments reference count, returns object as convenience.

				Regexp* Regexp::Incref() {

				  if (ref_ >= kMaxRef-1) {

				    // Store ref count in overflow map.

				    GLOBAL_MUTEX_LOCK(ref_mutex);

				    if (ref_map == NULL) {

				      ref_map = new map<Regexp*, int>;

				    }

				    if (ref_ == kMaxRef) {

				      // already overflowed

				      (*ref_map)[this]++;

				    } else {

				      // overflowing now

				      (*ref_map)[this] = kMaxRef;

				      ref_ = kMaxRef;

				    }

				    GLOBAL_MUTEX_UNLOCK(ref_mutex);

				    return this;

				  }

				  ref_++;

				  return this;

				}

				// Decrements reference count and deletes this object if count reaches 0.

				void Regexp::Decref() {

				  if (ref_ == kMaxRef) {

				    // Ref count is stored in overflow map.

				    GLOBAL_MUTEX_LOCK(ref_mutex);

				    int r = (*ref_map)[this] - 1;

				    if (r < kMaxRef) {

				      ref_ = static_cast<uint16>(r);

				      ref_map->erase(this);

				    } else {

				      (*ref_map)[this] = r;

				    }

				    GLOBAL_MUTEX_UNLOCK(ref_mutex);

				    return;

				  }

				  ref_--;

				  if (ref_ == 0)

				    Destroy();

				}

				// Deletes this object; ref count has count reached 0.

				void Regexp::Destroy() {

				  if (QuickDestroy())

				    return;

				  // Handle recursive Destroy with explicit stack

				  // to avoid arbitrarily deep recursion on process stack [sigh].

				  down_ = NULL;

				  Regexp* stack = this;

				  while (stack != NULL) {

				    Regexp* re = stack;

				    stack = re->down_;

				    if (re->ref_ != 0)

				      LOG(DFATAL) << "Bad reference count " << re->ref_;

				    if (re->nsub_ > 0) {

				      Regexp** subs = re->sub();

				      for (int i = 0; i < re->nsub_; i++) {

				        Regexp* sub = subs[i];

				        if (sub == NULL)

				          continue;

				        if (sub->ref_ == kMaxRef)

				          sub->Decref();

				        else

				          --sub->ref_;

				        if (sub->ref_ == 0 && !sub->QuickDestroy()) {

				          sub->down_ = stack;

				          stack = sub;

				        }

				      }

				      if (re->nsub_ > 1)

				        delete[] subs;

				      re->nsub_ = 0;

				    }

				    delete re;

				  }

				}

				void Regexp::AddRuneToString(Rune r) {

				  DCHECK(op_ == kRegexpLiteralString);

				  if (nrunes_ == 0) {

				    // start with 8

				    runes_ = new Rune[8];

				  } else if (nrunes_ >= 8 && (nrunes_ & (nrunes_ - 1)) == 0) {

				    // double on powers of two

				    Rune *old = runes_;

				    runes_ = new Rune[nrunes_ * 2];

				    for (int i = 0; i < nrunes_; i++)

				      runes_[i] = old[i];

				    delete[] old;

				  }

				  runes_[nrunes_++] = r;

				}

				Regexp* Regexp::HaveMatch(int match_id, ParseFlags flags) {

				  Regexp* re = new Regexp(kRegexpHaveMatch, flags);

				  re->match_id_ = match_id;

				  return re;

				}

				Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) {

				  if (sub->op() == kRegexpPlus && sub->parse_flags() == flags)

				    return sub;

				  Regexp* re = new Regexp(kRegexpPlus, flags);

				  re->AllocSub(1);

				  re->sub()[0] = sub;

				  return re;

				}

				Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) {

				  if (sub->op() == kRegexpStar && sub->parse_flags() == flags)

				    return sub;

				  Regexp* re = new Regexp(kRegexpStar, flags);

				  re->AllocSub(1);

				  re->sub()[0] = sub;

				  return re;

				}

				Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) {

				  if (sub->op() == kRegexpQuest && sub->parse_flags() == flags)

				    return sub;

				  Regexp* re = new Regexp(kRegexpQuest, flags);

				  re->AllocSub(1);

				  re->sub()[0] = sub;

				  return re;

				}

				Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,

				                                  ParseFlags flags, bool can_factor) {

				  if (nsub == 1)

				    return sub[0];

				  if (nsub == 0) {

				    if (op == kRegexpAlternate)

				      return new Regexp(kRegexpNoMatch, flags);

				    else

				      return new Regexp(kRegexpEmptyMatch, flags);

				  }

				  Regexp** subcopy = NULL;

				  if (op == kRegexpAlternate && can_factor) {

				    // Going to edit sub; make a copy so we don't step on caller.

				    subcopy = new Regexp*[nsub];

				    memmove(subcopy, sub, nsub * sizeof sub[0]);

				    sub = subcopy;

				    nsub = FactorAlternation(sub, nsub, flags);

				    if (nsub == 1) {

				      Regexp* re = sub[0];

				      delete[] subcopy;

				      return re;

				    }

				  }

				  if (nsub > kMaxNsub) {

				    // Too many subexpressions to fit in a single Regexp.

				    // Make a two-level tree.  Two levels gets us to 65535^2.

				    int nbigsub = (nsub+kMaxNsub-1)/kMaxNsub;

				    Regexp* re = new Regexp(op, flags);

				    re->AllocSub(nbigsub);

				    Regexp** subs = re->sub();

				    for (int i = 0; i < nbigsub - 1; i++)

				      subs[i] = ConcatOrAlternate(op, sub+i*kMaxNsub, kMaxNsub, flags, false);

				    subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub,

				                                          nsub - (nbigsub-1)*kMaxNsub, flags,

				                                          false);

				    delete[] subcopy;

				    return re;

				  }

				  Regexp* re = new Regexp(op, flags);

				  re->AllocSub(nsub);

				  Regexp** subs = re->sub();

				  for (int i = 0; i < nsub; i++)

				    subs[i] = sub[i];

				  delete[] subcopy;

				  return re;

				}

				Regexp* Regexp::Concat(Regexp** sub, int nsub, ParseFlags flags) {

				  return ConcatOrAlternate(kRegexpConcat, sub, nsub, flags, false);

				}

				Regexp* Regexp::Alternate(Regexp** sub, int nsub, ParseFlags flags) {

				  return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, true);

				}

				Regexp* Regexp::AlternateNoFactor(Regexp** sub, int nsub, ParseFlags flags) {

				  return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, false);

				}

				Regexp* Regexp::Capture(Regexp* sub, ParseFlags flags, int cap) {

				  Regexp* re = new Regexp(kRegexpCapture, flags);

				  re->AllocSub(1);

				  re->sub()[0] = sub;

				  re->cap_ = cap;

				  return re;

				}

				Regexp* Regexp::Repeat(Regexp* sub, ParseFlags flags, int min, int max) {

				  Regexp* re = new Regexp(kRegexpRepeat, flags);

				  re->AllocSub(1);

				  re->sub()[0] = sub;

				  re->min_ = min;

				  re->max_ = max;

				  return re;

				}

				Regexp* Regexp::NewLiteral(Rune rune, ParseFlags flags) {

				  Regexp* re = new Regexp(kRegexpLiteral, flags);

				  re->rune_ = rune;

				  return re;

				}

				Regexp* Regexp::LiteralString(Rune* runes, int nrunes, ParseFlags flags) {

				  if (nrunes <= 0)

				    return new Regexp(kRegexpEmptyMatch, flags);

				  if (nrunes == 1)

				    return NewLiteral(runes[0], flags);

				  Regexp* re = new Regexp(kRegexpLiteralString, flags);

				  for (int i = 0; i < nrunes; i++)

				    re->AddRuneToString(runes[i]);

				  return re;

				}

				Regexp* Regexp::NewCharClass(CharClass* cc, ParseFlags flags) {

				  Regexp* re = new Regexp(kRegexpCharClass, flags);

				  re->cc_ = cc;

				  return re;

				}

				// Swaps this and that in place.

				void Regexp::Swap(Regexp* that) {

				  // Can use memmove because Regexp is just a struct (no vtable).

				  char tmp[sizeof *this];

				  memmove(tmp, this, sizeof tmp);

				  memmove(this, that, sizeof tmp);

				  memmove(that, tmp, sizeof tmp);

				}

				// Tests equality of all top-level structure but not subregexps.

				static bool TopEqual(Regexp* a, Regexp* b) {

				  if (a->op() != b->op())

				    return false;

				  switch (a->op()) {

				    case kRegexpNoMatch:

				    case kRegexpEmptyMatch:

				    case kRegexpAnyChar:

				    case kRegexpAnyByte:

				    case kRegexpBeginLine:

				    case kRegexpEndLine:

				    case kRegexpWordBoundary:

				    case kRegexpNoWordBoundary:

				    case kRegexpBeginText:

				      return true;

				    case kRegexpEndText:

				      // The parse flags remember whether it's \z or (?-m:$),

				      // which matters when testing against PCRE.

				      return ((a->parse_flags() ^ b->parse_flags()) & Regexp::WasDollar) == 0;

				    case kRegexpLiteral:

				      return a->rune() == b->rune() &&

				             ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0;

				    case kRegexpLiteralString:

				      return a->nrunes() == b->nrunes() &&

				             ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0 &&

				             memcmp(a->runes(), b->runes(),

				                    a->nrunes() * sizeof a->runes()[0]) == 0;

				    case kRegexpAlternate:

				    case kRegexpConcat:

				      return a->nsub() == b->nsub();

				    case kRegexpStar:

				    case kRegexpPlus:

				    case kRegexpQuest:

				      return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0;

				    case kRegexpRepeat:

				      return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0 &&

				             a->min() == b->min() &&

				             a->max() == b->max();

				    case kRegexpCapture:

				      return a->cap() == b->cap() && a->name() == b->name();

				    case kRegexpHaveMatch:

				      return a->match_id() == b->match_id();

				    case kRegexpCharClass: {

				      CharClass* acc = a->cc();

				      CharClass* bcc = b->cc();

				      return acc->size() == bcc->size() &&

				             acc->end() - acc->begin() == bcc->end() - bcc->begin() &&

				             memcmp(acc->begin(), bcc->begin(),

				                    (acc->end() - acc->begin()) * sizeof acc->begin()[0]) == 0;

				    }

				  }

				  LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op();

				  return 0;

				}

				bool Regexp::Equal(Regexp* a, Regexp* b) {

				  if (a == NULL || b == NULL)

				    return a == b;

				  if (!TopEqual(a, b))

				    return false;

				  // Fast path:

				  // return without allocating vector if there are no subregexps.

				  switch (a->op()) {

				    case kRegexpAlternate:

				    case kRegexpConcat:

				    case kRegexpStar:

				    case kRegexpPlus:

				    case kRegexpQuest:

				    case kRegexpRepeat:

				    case kRegexpCapture:

				      break;

				    default:

				      return true;

				  }

				  // Committed to doing real work.

				  // The stack (vector) has pairs of regexps waiting to

				  // be compared.  The regexps are only equal if

				  // all the pairs end up being equal.

				  vector<Regexp*> stk;

				  for (;;) {

				    // Invariant: TopEqual(a, b) == true.

				    Regexp* a2;

				    Regexp* b2;

				    switch (a->op()) {

				      default:

				        break;

				      case kRegexpAlternate:

				      case kRegexpConcat:

				        for (int i = 0; i < a->nsub(); i++) {

				          a2 = a->sub()[i];

				          b2 = b->sub()[i];

				          if (!TopEqual(a2, b2))

				            return false;

				          stk.push_back(a2);

				          stk.push_back(b2);

				        }

				        break;

				      case kRegexpStar:

				      case kRegexpPlus:

				      case kRegexpQuest:

				      case kRegexpRepeat:

				      case kRegexpCapture:

				        a2 = a->sub()[0];

				        b2 = b->sub()[0];

				        if (!TopEqual(a2, b2))

				          return false;

				        // Really:

				        //   stk.push_back(a2);

				        //   stk.push_back(b2);

				        //   break;

				        // but faster to assign directly and loop.

				        a = a2;

				        b = b2;

				        continue;

				    }

				    size_t n = stk.size();

				    if (n == 0)

				      break;

				    DCHECK_GE(n, 2);

				    a = stk[n-2];

				    b = stk[n-1];

				    stk.resize(n-2);

				  }

				  return true;

				}

				// Keep in sync with enum RegexpStatusCode in regexp.h

				static const char *kErrorStrings[] = {

				  "no error",

				  "unexpected error",

				  "invalid escape sequence",

				  "invalid character class",

				  "invalid character class range",

				  "missing ]",

				  "missing )",

				  "trailing \\",

				  "no argument for repetition operator",

				  "invalid repetition size",

				  "bad repetition operator",

				  "invalid perl operator",

				  "invalid UTF-8",

				  "invalid named capture group",

				};

				string RegexpStatus::CodeText(enum RegexpStatusCode code) {

				  if (code < 0 || code >= arraysize(kErrorStrings))

				    code = kRegexpInternalError;

				  return kErrorStrings[code];

				}

				string RegexpStatus::Text() const {

				  if (error_arg_.empty())

				    return CodeText(code_);

				  string s;

				  s.append(CodeText(code_));

				  s.append(": ");

				  s.append(error_arg_.data(), error_arg_.size());

				  return s;

				}

				void RegexpStatus::Copy(const RegexpStatus& status) {

				  code_ = status.code_;

				  error_arg_ = status.error_arg_;

				}

				typedef int Ignored;  // Walker<void> doesn't exist

				// Walker subclass to count capturing parens in regexp.

				class NumCapturesWalker : public Regexp::Walker<Ignored> {

				 public:

				  NumCapturesWalker() : ncapture_(0) {}

				  int ncapture() { return ncapture_; }

				  virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {

				    if (re->op() == kRegexpCapture)

				      ncapture_++;

				    return ignored;

				  }

				  virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {

				    // Should never be called: we use Walk not WalkExponential.

				    LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";

				    return ignored;

				  }

				 private:

				  int ncapture_;

				  DISALLOW_COPY_AND_ASSIGN(NumCapturesWalker);

				};

				int Regexp::NumCaptures() {

				  NumCapturesWalker w;

				  w.Walk(this, 0);

				  return w.ncapture();

				}

				// Walker class to build map of named capture groups and their indices.

				class NamedCapturesWalker : public Regexp::Walker<Ignored> {

				 public:

				  NamedCapturesWalker() : map_(NULL) {}

				  ~NamedCapturesWalker() { delete map_; }

				  map<string, int>* TakeMap() {

				    map<string, int>* m = map_;

				    map_ = NULL;

				    return m;

				  }

				  Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {

				    if (re->op() == kRegexpCapture && re->name() != NULL) {

				      // Allocate map once we find a name.

				      if (map_ == NULL)

				        map_ = new map<string, int>;

				      // Record first occurrence of each name.

				      // (The rule is that if you have the same name

				      // multiple times, only the leftmost one counts.)

				      if (map_->find(*re->name()) == map_->end())

				        (*map_)[*re->name()] = re->cap();

				    }

				    return ignored;

				  }

				  virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {

				    // Should never be called: we use Walk not WalkExponential.

				    LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";

				    return ignored;

				  }

				 private:

				  map<string, int>* map_;

				  DISALLOW_COPY_AND_ASSIGN(NamedCapturesWalker);

				};

				map<string, int>* Regexp::NamedCaptures() {

				  NamedCapturesWalker w;

				  w.Walk(this, 0);

				  return w.TakeMap();

				}

				// Walker class to build map from capture group indices to their names.

				class CaptureNamesWalker : public Regexp::Walker<Ignored> {

				 public:

				  CaptureNamesWalker() : map_(NULL) {}

				  ~CaptureNamesWalker() { delete map_; }

				  map<int, string>* TakeMap() {

				    map<int, string>* m = map_;

				    map_ = NULL;

				    return m;

				  }

				  Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {

				    if (re->op() == kRegexpCapture && re->name() != NULL) {

				      // Allocate map once we find a name.

				      if (map_ == NULL)

				        map_ = new map<int, string>;

				      (*map_)[re->cap()] = *re->name();

				    }

				    return ignored;

				  }

				  virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {

				    // Should never be called: we use Walk not WalkExponential.

				    LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";

				    return ignored;

				  }

				 private:

				  map<int, string>* map_;

				  DISALLOW_COPY_AND_ASSIGN(CaptureNamesWalker);

				};

				map<int, string>* Regexp::CaptureNames() {

				  CaptureNamesWalker w;

				  w.Walk(this, 0);

				  return w.TakeMap();

				}

				// Determines whether regexp matches must be anchored

				// with a fixed string prefix.  If so, returns the prefix and

				// the regexp that remains after the prefix.  The prefix might

				// be ASCII case-insensitive.

				bool Regexp::RequiredPrefix(string *prefix, bool *foldcase, Regexp** suffix) {

				  // No need for a walker: the regexp must be of the form

				  // 1. some number of ^ anchors

				  // 2. a literal char or string

				  // 3. the rest

				  prefix->clear();

				  *foldcase = false;

				  *suffix = NULL;

				  if (op_ != kRegexpConcat)

				    return false;

				  // Some number of anchors, then a literal or concatenation.

				  int i = 0;

				  Regexp** sub = this->sub();

				  while (i < nsub_ && sub[i]->op_ == kRegexpBeginText)

				    i++;

				  if (i == 0 || i >= nsub_)

				    return false;

				  Regexp* re = sub[i];

				  switch (re->op_) {

				    default:

				      return false;

				    case kRegexpLiteralString:

				      // Convert to string in proper encoding.

				      if (re->parse_flags() & Latin1) {

				        prefix->resize(re->nrunes_);

				        for (int j = 0; j < re->nrunes_; j++)

				          (*prefix)[j] = static_cast<char>(re->runes_[j]);

				      } else {

				        // Convert to UTF-8 in place.

				        // Assume worst-case space and then trim.

				        prefix->resize(re->nrunes_ * UTFmax);

				        char *p = &(*prefix)[0];

				        for (int j = 0; j < re->nrunes_; j++) {

				          Rune r = re->runes_[j];

				          if (r < Runeself)

				            *p++ = static_cast<char>(r);

				          else

				            p += runetochar(p, &r);

				        }

				        prefix->resize(p - &(*prefix)[0]);

				      }

				      break;

				    case kRegexpLiteral:

				      if ((re->parse_flags() & Latin1) || re->rune_ < Runeself) {

				        prefix->append(1, static_cast<char>(re->rune_));

				      } else {

				        char buf[UTFmax];

				        prefix->append(buf, runetochar(buf, &re->rune_));

				      }

				      break;

				  }

				  *foldcase = (sub[i]->parse_flags() & FoldCase) != 0;

				  i++;

				  // The rest.

				  if (i < nsub_) {

				    for (int j = i; j < nsub_; j++)

				      sub[j]->Incref();

				    re = Concat(sub + i, nsub_ - i, parse_flags());

				  } else {

				    re = new Regexp(kRegexpEmptyMatch, parse_flags());

				  }

				  *suffix = re;

				  return true;

				}

				// Character class builder is a balanced binary tree (STL set)

				// containing non-overlapping, non-abutting RuneRanges.

				// The less-than operator used in the tree treats two

				// ranges as equal if they overlap at all, so that

				// lookups for a particular Rune are possible.

				CharClassBuilder::CharClassBuilder() {

				  nrunes_ = 0;

				  upper_ = 0;

				  lower_ = 0;

				}

				// Add lo-hi to the class; return whether class got bigger.

				bool CharClassBuilder::AddRange(Rune lo, Rune hi) {

				  if (hi < lo)

				    return false;

				  if (lo <= 'z' && hi >= 'A') {

				    // Overlaps some alpha, maybe not all.

				    // Update bitmaps telling which ASCII letters are in the set.

				    Rune lo1 = max<Rune>(lo, 'A');

				    Rune hi1 = min<Rune>(hi, 'Z');

				    if (lo1 <= hi1)

				      upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A');

				    lo1 = max<Rune>(lo, 'a');

				    hi1 = min<Rune>(hi, 'z');

				    if (lo1 <= hi1)

				      lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a');

				  }

				  {  // Check whether lo, hi is already in the class.

				    iterator it = ranges_.find(RuneRange(lo, lo));

				    if (it != end() && it->lo <= lo && hi <= it->hi)

				      return false;

				  }

				  // Look for a range abutting lo on the left.

				  // If it exists, take it out and increase our range.

				  if (lo > 0) {

				    iterator it = ranges_.find(RuneRange(lo-1, lo-1));

				    if (it != end()) {

				      lo = it->lo;

				      if (it->hi > hi)

				        hi = it->hi;

				      nrunes_ -= it->hi - it->lo + 1;

				      ranges_.erase(it);

				    }

				  }

				  // Look for a range abutting hi on the right.

				  // If it exists, take it out and increase our range.

				  if (hi < Runemax) {

				    iterator it = ranges_.find(RuneRange(hi+1, hi+1));

				    if (it != end()) {

				      hi = it->hi;

				      nrunes_ -= it->hi - it->lo + 1;

				      ranges_.erase(it);

				    }

				  }

				  // Look for ranges between lo and hi.  Take them out.

				  // This is only safe because the set has no overlapping ranges.

				  // We've already removed any ranges abutting lo and hi, so

				  // any that overlap [lo, hi] must be contained within it.

				  for (;;) {

				    iterator it = ranges_.find(RuneRange(lo, hi));

				    if (it == end())

				      break;

				    nrunes_ -= it->hi - it->lo + 1;

				    ranges_.erase(it);

				  }

				  // Finally, add [lo, hi].

				  nrunes_ += hi - lo + 1;

				  ranges_.insert(RuneRange(lo, hi));

				  return true;

				}

				void CharClassBuilder::AddCharClass(CharClassBuilder *cc) {

				  for (iterator it = cc->begin(); it != cc->end(); ++it)

				    AddRange(it->lo, it->hi);

				}

				bool CharClassBuilder::Contains(Rune r) {

				  return ranges_.find(RuneRange(r, r)) != end();

				}

				// Does the character class behave the same on A-Z as on a-z?

				bool CharClassBuilder::FoldsASCII() {

				  return ((upper_ ^ lower_) & AlphaMask) == 0;

				}

				CharClassBuilder* CharClassBuilder::Copy() {

				  CharClassBuilder* cc = new CharClassBuilder;

				  for (iterator it = begin(); it != end(); ++it)

				    cc->ranges_.insert(RuneRange(it->lo, it->hi));

				  cc->upper_ = upper_;

				  cc->lower_ = lower_;

				  cc->nrunes_ = nrunes_;

				  return cc;

				}

				void CharClassBuilder::RemoveAbove(Rune r) {

				  if (r >= Runemax)

				    return;

				  if (r < 'z') {

				    if (r < 'a')

				      lower_ = 0;

				    else

				      lower_ &= AlphaMask >> ('z' - r);

				  }

				  if (r < 'Z') {

				    if (r < 'A')

				      upper_ = 0;

				    else

				      upper_ &= AlphaMask >> ('Z' - r);

				  }

				  for (;;) {

				    iterator it = ranges_.find(RuneRange(r + 1, Runemax));

				    if (it == end())

				      break;

				    RuneRange rr = *it;

				    ranges_.erase(it);

				    nrunes_ -= rr.hi - rr.lo + 1;

				    if (rr.lo <= r) {

				      rr.hi = r;

				      ranges_.insert(rr);

				      nrunes_ += rr.hi - rr.lo + 1;

				    }

				  }

				}

				void CharClassBuilder::Negate() {

				  // Build up negation and then copy in.

				  // Could edit ranges in place, but C++ won't let me.

				  vector<RuneRange> v;

				  v.reserve(ranges_.size() + 1);

				  // In negation, first range begins at 0, unless

				  // the current class begins at 0.

				  iterator it = begin();

				  if (it == end()) {

				    v.push_back(RuneRange(0, Runemax));

				  } else {

				    int nextlo = 0;

				    if (it->lo == 0) {

				      nextlo = it->hi + 1;

				      ++it;

				    }

				    for (; it != end(); ++it) {

				      v.push_back(RuneRange(nextlo, it->lo - 1));

				      nextlo = it->hi + 1;

				    }

				    if (nextlo <= Runemax)

				      v.push_back(RuneRange(nextlo, Runemax));

				  }

				  ranges_.clear();

				  for (size_t i = 0; i < v.size(); i++)

				    ranges_.insert(v[i]);

				  upper_ = AlphaMask & ~upper_;

				  lower_ = AlphaMask & ~lower_;

				  nrunes_ = Runemax+1 - nrunes_;

				}

				// Character class is a sorted list of ranges.

				// The ranges are allocated in the same block as the header,

				// necessitating a special allocator and Delete method.

				CharClass* CharClass::New(int maxranges) {

				  CharClass* cc;

				  uint8* data = new uint8[sizeof *cc + maxranges*sizeof cc->ranges_[0]];

				  cc = reinterpret_cast<CharClass*>(data);

				  cc->ranges_ = reinterpret_cast<RuneRange*>(data + sizeof *cc);

				  cc->nranges_ = 0;

				  cc->folds_ascii_ = false;

				  cc->nrunes_ = 0;

				  return cc;

				}

				void CharClass::Delete() {

				  uint8 *data = reinterpret_cast<uint8*>(this);

				  delete[] data;

				}

				CharClass* CharClass::Negate() {

				  CharClass* cc = CharClass::New(nranges_+1);

				  cc->folds_ascii_ = folds_ascii_;

				  cc->nrunes_ = Runemax + 1 - nrunes_;

				  int n = 0;

				  int nextlo = 0;

				  for (CharClass::iterator it = begin(); it != end(); ++it) {

				    if (it->lo == nextlo) {

				      nextlo = it->hi + 1;

				    } else {

				      cc->ranges_[n++] = RuneRange(nextlo, it->lo - 1);

				      nextlo = it->hi + 1;

				    }

				  }

				  if (nextlo <= Runemax)

				    cc->ranges_[n++] = RuneRange(nextlo, Runemax);

				  cc->nranges_ = n;

				  return cc;

				}

				bool CharClass::Contains(Rune r) {

				  RuneRange* rr = ranges_;

				  int n = nranges_;

				  while (n > 0) {

				    int m = n/2;

				    if (rr[m].hi < r) {

				      rr += m+1;

				      n -= m+1;

				    } else if (r < rr[m].lo) {

				      n = m;

				    } else {  // rr[m].lo <= r && r <= rr[m].hi

				      return true;

				    }

				  }

				  return false;

				}

				CharClass* CharClassBuilder::GetCharClass() {

				  CharClass* cc = CharClass::New(static_cast<int>(ranges_.size()));

				  int n = 0;

				  for (iterator it = begin(); it != end(); ++it)

				    cc->ranges_[n++] = *it;

				  cc->nranges_ = n;

				  DCHECK_LE(n, static_cast<int>(ranges_.size()));

				  cc->nrunes_ = nrunes_;

				  cc->folds_ascii_ = FoldsASCII();

				  return cc;

				}

				}  // namespace re2

									
										635

third_party/re2/re2/regexp.h
									
										vendored
									
				@ -1,635 +0,0 @@

				// Copyright 2006 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// --- SPONSORED LINK --------------------------------------------------

				// If you want to use this library for regular expression matching,

				// you should use re2/re2.h, which provides a class RE2 that

				// mimics the PCRE interface provided by PCRE's C++ wrappers.

				// This header describes the low-level interface used to implement RE2

				// and may change in backwards-incompatible ways from time to time.

				// In contrast, RE2's interface will not.

				// ---------------------------------------------------------------------

				// Regular expression library: parsing, execution, and manipulation

				// of regular expressions.

				//

				// Any operation that traverses the Regexp structures should be written

				// using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested

				// regular expressions such as x++++++++++++++++++++... might cause recursive

				// traversals to overflow the stack.

				//

				// It is the caller's responsibility to provide appropriate mutual exclusion

				// around manipulation of the regexps.  RE2 does this.

				//

				// PARSING

				//

				// Regexp::Parse parses regular expressions encoded in UTF-8.

				// The default syntax is POSIX extended regular expressions,

				// with the following changes:

				//

				//   1.  Backreferences (optional in POSIX EREs) are not supported.

				//         (Supporting them precludes the use of DFA-based

				//          matching engines.)

				//

				//   2.  Collating elements and collation classes are not supported.

				//         (No one has needed or wanted them.)

				//

				// The exact syntax accepted can be modified by passing flags to

				// Regexp::Parse.  In particular, many of the basic Perl additions

				// are available.  The flags are documented below (search for LikePerl).

				//

				// If parsed with the flag Regexp::Latin1, both the regular expression

				// and the input to the matching routines are assumed to be encoded in

				// Latin-1, not UTF-8.

				//

				// EXECUTION

				//

				// Once Regexp has parsed a regular expression, it provides methods

				// to search text using that regular expression.  These methods are

				// implemented via calling out to other regular expression libraries.

				// (Let's call them the sublibraries.)

				//

				// To call a sublibrary, Regexp does not simply prepare a

				// string version of the regular expression and hand it to the

				// sublibrary.  Instead, Regexp prepares, from its own parsed form, the

				// corresponding internal representation used by the sublibrary.

				// This has the drawback of needing to know the internal representation

				// used by the sublibrary, but it has two important benefits:

				//

				//   1. The syntax and meaning of regular expressions is guaranteed

				//      to be that used by Regexp's parser, not the syntax expected

				//      by the sublibrary.  Regexp might accept a restricted or

				//      expanded syntax for regular expressions as compared with

				//      the sublibrary.  As long as Regexp can translate from its

				//      internal form into the sublibrary's, clients need not know

				//      exactly which sublibrary they are using.

				//

				//   2. The sublibrary parsers are bypassed.  For whatever reason,

				//      sublibrary regular expression parsers often have security

				//      problems.  For example, plan9grep's regular expression parser

				//      has a buffer overflow in its handling of large character

				//      classes, and PCRE's parser has had buffer overflow problems

				//      in the past.  Security-team requires sandboxing of sublibrary

				//      regular expression parsers.  Avoiding the sublibrary parsers

				//      avoids the sandbox.

				//

				// The execution methods we use now are provided by the compiled form,

				// Prog, described in prog.h

				//

				// MANIPULATION

				//

				// Unlike other regular expression libraries, Regexp makes its parsed

				// form accessible to clients, so that client code can analyze the

				// parsed regular expressions.

				#ifndef RE2_REGEXP_H__

				#define RE2_REGEXP_H__

				#include "util/util.h"

				#include "re2/stringpiece.h"

				namespace re2 {

				// Keep in sync with string list kOpcodeNames[] in testing/dump.cc

				enum RegexpOp {

				  // Matches no strings.

				  kRegexpNoMatch = 1,

				  // Matches empty string.

				  kRegexpEmptyMatch,

				  // Matches rune_.

				  kRegexpLiteral,

				  // Matches runes_.

				  kRegexpLiteralString,

				  // Matches concatenation of sub_[0..nsub-1].

				  kRegexpConcat,

				  // Matches union of sub_[0..nsub-1].

				  kRegexpAlternate,

				  // Matches sub_[0] zero or more times.

				  kRegexpStar,

				  // Matches sub_[0] one or more times.

				  kRegexpPlus,

				  // Matches sub_[0] zero or one times.

				  kRegexpQuest,

				  // Matches sub_[0] at least min_ times, at most max_ times.

				  // max_ == -1 means no upper limit.

				  kRegexpRepeat,

				  // Parenthesized (capturing) subexpression.  Index is cap_.

				  // Optionally, capturing name is name_.

				  kRegexpCapture,

				  // Matches any character.

				  kRegexpAnyChar,

				  // Matches any byte [sic].

				  kRegexpAnyByte,

				  // Matches empty string at beginning of line.

				  kRegexpBeginLine,

				  // Matches empty string at end of line.

				  kRegexpEndLine,

				  // Matches word boundary "\b".

				  kRegexpWordBoundary,

				  // Matches not-a-word boundary "\B".

				  kRegexpNoWordBoundary,

				  // Matches empty string at beginning of text.

				  kRegexpBeginText,

				  // Matches empty string at end of text.

				  kRegexpEndText,

				  // Matches character class given by cc_.

				  kRegexpCharClass,

				  // Forces match of entire expression right now,

				  // with match ID match_id_ (used by RE2::Set).

				  kRegexpHaveMatch,

				  kMaxRegexpOp = kRegexpHaveMatch,

				};

				// Keep in sync with string list in regexp.cc

				enum RegexpStatusCode {

				  // No error

				  kRegexpSuccess = 0,

				  // Unexpected error

				  kRegexpInternalError,

				  // Parse errors

				  kRegexpBadEscape,          // bad escape sequence

				  kRegexpBadCharClass,       // bad character class

				  kRegexpBadCharRange,       // bad character class range

				  kRegexpMissingBracket,     // missing closing ]

				  kRegexpMissingParen,       // missing closing )

				  kRegexpTrailingBackslash,  // at end of regexp

				  kRegexpRepeatArgument,     // repeat argument missing, e.g. "*"

				  kRegexpRepeatSize,         // bad repetition argument

				  kRegexpRepeatOp,           // bad repetition operator

				  kRegexpBadPerlOp,          // bad perl operator

				  kRegexpBadUTF8,            // invalid UTF-8 in regexp

				  kRegexpBadNamedCapture,    // bad named capture

				};

				// Error status for certain operations.

				class RegexpStatus {

				 public:

				  RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {}

				  ~RegexpStatus() { delete tmp_; }

				  void set_code(enum RegexpStatusCode code) { code_ = code; }

				  void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }

				  void set_tmp(string* tmp) { delete tmp_; tmp_ = tmp; }

				  enum RegexpStatusCode code() const { return code_; }

				  const StringPiece& error_arg() const { return error_arg_; }

				  bool ok() const { return code() == kRegexpSuccess; }

				  // Copies state from status.

				  void Copy(const RegexpStatus& status);

				  // Returns text equivalent of code, e.g.:

				  //   "Bad character class"

				  static string CodeText(enum RegexpStatusCode code);

				  // Returns text describing error, e.g.:

				  //   "Bad character class: [z-a]"

				  string Text() const;

				 private:

				  enum RegexpStatusCode code_;  // Kind of error

				  StringPiece error_arg_;       // Piece of regexp containing syntax error.

				  string* tmp_;                 // Temporary storage, possibly where error_arg_ is.

				  DISALLOW_COPY_AND_ASSIGN(RegexpStatus);

				};

				// Walkers to implement Simplify.

				class CoalesceWalker;

				class SimplifyWalker;

				// Compiled form; see prog.h

				class Prog;

				struct RuneRange {

				  RuneRange() : lo(0), hi(0) { }

				  RuneRange(int l, int h) : lo(l), hi(h) { }

				  Rune lo;

				  Rune hi;

				};

				// Less-than on RuneRanges treats a == b if they overlap at all.

				// This lets us look in a set to find the range covering a particular Rune.

				struct RuneRangeLess {

				  bool operator()(const RuneRange& a, const RuneRange& b) const {

				    return a.hi < b.lo;

				  }

				};

				class CharClassBuilder;

				class CharClass {

				 public:

				  void Delete();

				  typedef RuneRange* iterator;

				  iterator begin() { return ranges_; }

				  iterator end() { return ranges_ + nranges_; }

				  int size() { return nrunes_; }

				  bool empty() { return nrunes_ == 0; }

				  bool full() { return nrunes_ == Runemax+1; }

				  bool FoldsASCII() { return folds_ascii_; }

				  bool Contains(Rune r);

				  CharClass* Negate();

				 private:

				  CharClass();  // not implemented

				  ~CharClass();  // not implemented

				  static CharClass* New(int maxranges);

				  friend class CharClassBuilder;

				  bool folds_ascii_;

				  int nrunes_;

				  RuneRange *ranges_;

				  int nranges_;

				  DISALLOW_COPY_AND_ASSIGN(CharClass);

				};

				class Regexp {

				 public:

				  // Flags for parsing.  Can be ORed together.

				  enum ParseFlags {

				    NoParseFlags = 0,

				    FoldCase     = 1<<0,   // Fold case during matching (case-insensitive).

				    Literal      = 1<<1,   // Treat s as literal string instead of a regexp.

				    ClassNL      = 1<<2,   // Allow char classes like [^a-z] and \D and \s

				                           // and [[:space:]] to match newline.

				    DotNL        = 1<<3,   // Allow . to match newline.

				    MatchNL      = ClassNL | DotNL,

				    OneLine      = 1<<4,   // Treat ^ and $ as only matching at beginning and

				                           // end of text, not around embedded newlines.

				                           // (Perl's default)

				    Latin1       = 1<<5,   // Regexp and text are in Latin1, not UTF-8.

				    NonGreedy    = 1<<6,   // Repetition operators are non-greedy by default.

				    PerlClasses  = 1<<7,   // Allow Perl character classes like \d.

				    PerlB        = 1<<8,   // Allow Perl's \b and \B.

				    PerlX        = 1<<9,   // Perl extensions:

				                           //   non-capturing parens - (?: )

				                           //   non-greedy operators - *? +? ?? {}?

				                           //   flag edits - (?i) (?-i) (?i: )

				                           //     i - FoldCase

				                           //     m - !OneLine

				                           //     s - DotNL

				                           //     U - NonGreedy

				                           //   line ends: \A \z

				                           //   \Q and \E to disable/enable metacharacters

				                           //   (?P<name>expr) for named captures

				                           //   \C to match any single byte

				    UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group

				                           //   and \P{Han} for its negation.

				    NeverNL      = 1<<11,  // Never match NL, even if the regexp mentions

				                           //   it explicitly.

				    NeverCapture = 1<<12,  // Parse all parens as non-capturing.

				    // As close to Perl as we can get.

				    LikePerl     = ClassNL | OneLine | PerlClasses | PerlB | PerlX |

				                   UnicodeGroups,

				    // Internal use only.

				    WasDollar    = 1<<15,  // on kRegexpEndText: was $ in regexp text

				  };

				  // Get.  No set, Regexps are logically immutable once created.

				  RegexpOp op() { return static_cast<RegexpOp>(op_); }

				  int nsub() { return nsub_; }

				  bool simple() { return simple_ != 0; }

				  enum ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }

				  int Ref();  // For testing.

				  Regexp** sub() {

				    if(nsub_ <= 1)

				      return &subone_;

				    else

				      return submany_;

				  }

				  int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; }

				  int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; }

				  Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }

				  CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }

				  int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }

				  const string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }

				  Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }

				  int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }

				  int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }

				  // Increments reference count, returns object as convenience.

				  Regexp* Incref();

				  // Decrements reference count and deletes this object if count reaches 0.

				  void Decref();

				  // Parses string s to produce regular expression, returned.

				  // Caller must release return value with re->Decref().

				  // On failure, sets *status (if status != NULL) and returns NULL.

				  static Regexp* Parse(const StringPiece& s, ParseFlags flags,

				                       RegexpStatus* status);

				  // Returns a _new_ simplified version of the current regexp.

				  // Does not edit the current regexp.

				  // Caller must release return value with re->Decref().

				  // Simplified means that counted repetition has been rewritten

				  // into simpler terms and all Perl/POSIX features have been

				  // removed.  The result will capture exactly the same

				  // subexpressions the original did, unless formatted with ToString.

				  Regexp* Simplify();

				  friend class CoalesceWalker;

				  friend class SimplifyWalker;

				  // Parses the regexp src and then simplifies it and sets *dst to the

				  // string representation of the simplified form.  Returns true on success.

				  // Returns false and sets *status (if status != NULL) on parse error.

				  static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags,

				                             string* dst,

				                             RegexpStatus* status);

				  // Returns the number of capturing groups in the regexp.

				  int NumCaptures();

				  friend class NumCapturesWalker;

				  // Returns a map from names to capturing group indices,

				  // or NULL if the regexp contains no named capture groups.

				  // The caller is responsible for deleting the map.

				  map<string, int>* NamedCaptures();

				  // Returns a map from capturing group indices to capturing group

				  // names or NULL if the regexp contains no named capture groups. The

				  // caller is responsible for deleting the map.

				  map<int, string>* CaptureNames();

				  // Returns a string representation of the current regexp,

				  // using as few parentheses as possible.

				  string ToString();

				  // Convenience functions.  They consume the passed reference,

				  // so in many cases you should use, e.g., Plus(re->Incref(), flags).

				  // They do not consume allocated arrays like subs or runes.

				  static Regexp* Plus(Regexp* sub, ParseFlags flags);

				  static Regexp* Star(Regexp* sub, ParseFlags flags);

				  static Regexp* Quest(Regexp* sub, ParseFlags flags);

				  static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags);

				  static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags);

				  static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap);

				  static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max);

				  static Regexp* NewLiteral(Rune rune, ParseFlags flags);

				  static Regexp* NewCharClass(CharClass* cc, ParseFlags flags);

				  static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags);

				  static Regexp* HaveMatch(int match_id, ParseFlags flags);

				  // Like Alternate but does not factor out common prefixes.

				  static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags);

				  // Debugging function.  Returns string format for regexp

				  // that makes structure clear.  Does NOT use regexp syntax.

				  string Dump();

				  // Helper traversal class, defined fully in walker-inl.h.

				  template<typename T> class Walker;

				  // Compile to Prog.  See prog.h

				  // Reverse prog expects to be run over text backward.

				  // Construction and execution of prog will

				  // stay within approximately max_mem bytes of memory.

				  // If max_mem <= 0, a reasonable default is used.

				  Prog* CompileToProg(int64 max_mem);

				  Prog* CompileToReverseProg(int64 max_mem);

				  // Whether to expect this library to find exactly the same answer as PCRE

				  // when running this regexp.  Most regexps do mimic PCRE exactly, but a few

				  // obscure cases behave differently.  Technically this is more a property

				  // of the Prog than the Regexp, but the computation is much easier to do

				  // on the Regexp.  See mimics_pcre.cc for the exact conditions.

				  bool MimicsPCRE();

				  // Benchmarking function.

				  void NullWalk();

				  // Whether every match of this regexp must be anchored and

				  // begin with a non-empty fixed string (perhaps after ASCII

				  // case-folding).  If so, returns the prefix and the sub-regexp that

				  // follows it.

				  bool RequiredPrefix(string* prefix, bool *foldcase, Regexp** suffix);

				 private:

				  // Constructor allocates vectors as appropriate for operator.

				  explicit Regexp(RegexpOp op, ParseFlags parse_flags);

				  // Use Decref() instead of delete to release Regexps.

				  // This is private to catch deletes at compile time.

				  ~Regexp();

				  void Destroy();

				  bool QuickDestroy();

				  // Helpers for Parse.  Listed here so they can edit Regexps.

				  class ParseState;

				  friend class ParseState;

				  friend bool ParseCharClass(StringPiece* s, Regexp** out_re,

				                             RegexpStatus* status);

				  // Helper for testing [sic].

				  friend bool RegexpEqualTestingOnly(Regexp*, Regexp*);

				  // Computes whether Regexp is already simple.

				  bool ComputeSimple();

				  // Constructor that generates a concatenation or alternation,

				  // enforcing the limit on the number of subexpressions for

				  // a particular Regexp.

				  static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs,

				                                   ParseFlags flags, bool can_factor);

				  // Returns the leading string that re starts with.

				  // The returned Rune* points into a piece of re,

				  // so it must not be used after the caller calls re->Decref().

				  static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags);

				  // Removes the first n leading runes from the beginning of re.

				  // Edits re in place.

				  static void RemoveLeadingString(Regexp* re, int n);

				  // Returns the leading regexp in re's top-level concatenation.

				  // The returned Regexp* points at re or a sub-expression of re,

				  // so it must not be used after the caller calls re->Decref().

				  static Regexp* LeadingRegexp(Regexp* re);

				  // Removes LeadingRegexp(re) from re and returns the remainder.

				  // Might edit re in place.

				  static Regexp* RemoveLeadingRegexp(Regexp* re);

				  // Simplifies an alternation of literal strings by factoring out

				  // common prefixes.

				  static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);

				  static int FactorAlternationRecursive(Regexp** sub, int nsub,

				                                        ParseFlags flags, int maxdepth);

				  // Is a == b?  Only efficient on regexps that have not been through

				  // Simplify yet - the expansion of a kRegexpRepeat will make this

				  // take a long time.  Do not call on such regexps, hence private.

				  static bool Equal(Regexp* a, Regexp* b);

				  // Allocate space for n sub-regexps.

				  void AllocSub(int n) {

				    if (n < 0 || static_cast<uint16>(n) != n)

				      LOG(FATAL) << "Cannot AllocSub " << n;

				    if (n > 1)

				      submany_ = new Regexp*[n];

				    nsub_ = n;

				  }

				  // Add Rune to LiteralString

				  void AddRuneToString(Rune r);

				  // Swaps this with that, in place.

				  void Swap(Regexp *that);

				  // Operator.  See description of operators above.

				  // uint8 instead of RegexpOp to control space usage.

				  uint8 op_;

				  // Is this regexp structure already simple

				  // (has it been returned by Simplify)?

				  // uint8 instead of bool to control space usage.

				  uint8 simple_;

				  // Flags saved from parsing and used during execution.

				  // (Only FoldCase is used.)

				  // uint16 instead of ParseFlags to control space usage.

				  uint16 parse_flags_;

				  // Reference count.  Exists so that SimplifyRegexp can build

				  // regexp structures that are dags rather than trees to avoid

				  // exponential blowup in space requirements.

				  // uint16 to control space usage.

				  // The standard regexp routines will never generate a

				  // ref greater than the maximum repeat count (100),

				  // but even so, Incref and Decref consult an overflow map

				  // when ref_ reaches kMaxRef.

				  uint16 ref_;

				  static const uint16 kMaxRef = 0xffff;

				  // Subexpressions.

				  // uint16 to control space usage.

				  // Concat and Alternate handle larger numbers of subexpressions

				  // by building concatenation or alternation trees.

				  // Other routines should call Concat or Alternate instead of

				  // filling in sub() by hand.

				  uint16 nsub_;

				  static const uint16 kMaxNsub = 0xffff;

				  union {

				    Regexp** submany_;  // if nsub_ > 1

				    Regexp* subone_;  // if nsub_ == 1

				  };

				  // Extra space for parse and teardown stacks.

				  Regexp* down_;

				  // Arguments to operator.  See description of operators above.

				  union {

				    struct {  // Repeat

				      int max_;

				      int min_;

				    };

				    struct {  // Capture

				      int cap_;

				      string* name_;

				    };

				    struct {  // LiteralString

				      int nrunes_;

				      Rune* runes_;

				    };

				    struct {  // CharClass

				      // These two could be in separate union members,

				      // but it wouldn't save any space (there are other two-word structs)

				      // and keeping them separate avoids confusion during parsing.

				      CharClass* cc_;

				      CharClassBuilder* ccb_;

				    };

				    Rune rune_;  // Literal

				    int match_id_;  // HaveMatch

				    void *the_union_[2];  // as big as any other element, for memset

				  };

				  DISALLOW_COPY_AND_ASSIGN(Regexp);

				};

				// Character class set: contains non-overlapping, non-abutting RuneRanges.

				typedef set<RuneRange, RuneRangeLess> RuneRangeSet;

				class CharClassBuilder {

				 public:

				  CharClassBuilder();

				  typedef RuneRangeSet::iterator iterator;

				  iterator begin() { return ranges_.begin(); }

				  iterator end() { return ranges_.end(); }

				  int size() { return nrunes_; }

				  bool empty() { return nrunes_ == 0; }

				  bool full() { return nrunes_ == Runemax+1; }

				  bool Contains(Rune r);

				  bool FoldsASCII();

				  bool AddRange(Rune lo, Rune hi);  // returns whether class changed

				  CharClassBuilder* Copy();

				  void AddCharClass(CharClassBuilder* cc);

				  void Negate();

				  void RemoveAbove(Rune r);

				  CharClass* GetCharClass();

				  void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags);

				 private:

				  static const uint32 AlphaMask = (1<<26) - 1;

				  uint32 upper_;  // bitmap of A-Z

				  uint32 lower_;  // bitmap of a-z

				  int nrunes_;

				  RuneRangeSet ranges_;

				  DISALLOW_COPY_AND_ASSIGN(CharClassBuilder);

				};

				// Tell g++ that bitwise ops on ParseFlags produce ParseFlags.

				inline Regexp::ParseFlags operator|(Regexp::ParseFlags a, Regexp::ParseFlags b)

				{

				  return static_cast<Regexp::ParseFlags>(static_cast<int>(a) | static_cast<int>(b));

				}

				inline Regexp::ParseFlags operator^(Regexp::ParseFlags a, Regexp::ParseFlags b)

				{

				  return static_cast<Regexp::ParseFlags>(static_cast<int>(a) ^ static_cast<int>(b));

				}

				inline Regexp::ParseFlags operator&(Regexp::ParseFlags a, Regexp::ParseFlags b)

				{

				  return static_cast<Regexp::ParseFlags>(static_cast<int>(a) & static_cast<int>(b));

				}

				inline Regexp::ParseFlags operator~(Regexp::ParseFlags a)

				{

				  return static_cast<Regexp::ParseFlags>(~static_cast<int>(a));

				}

				}  // namespace re2

				#endif  // RE2_REGEXP_H__

									
										113

third_party/re2/re2/set.cc
									
										vendored
									
				@ -1,113 +0,0 @@

				// Copyright 2010 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				#include "re2/set.h"

				#include "util/util.h"

				#include "re2/stringpiece.h"

				#include "re2/prog.h"

				#include "re2/re2.h"

				#include "re2/regexp.h"

				using namespace re2;

				RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {

				  options_.Copy(options);

				  anchor_ = anchor;

				  prog_ = NULL;

				  compiled_ = false;

				}

				RE2::Set::~Set() {

				  for (size_t i = 0; i < re_.size(); i++)

				    re_[i]->Decref();

				  delete prog_;

				}

				int RE2::Set::Add(const StringPiece& pattern, string* error) {

				  if (compiled_) {

				    LOG(DFATAL) << "RE2::Set::Add after Compile";

				    return -1;

				  }

				  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(

				    options_.ParseFlags());

				  RegexpStatus status;

				  re2::Regexp* re = Regexp::Parse(pattern, pf, &status);

				  if (re == NULL) {

				    if (error != NULL)

				      *error = status.Text();

				    if (options_.log_errors())

				      LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();

				    return -1;

				  }

				  // Concatenate with match index and push on vector.

				  int n = static_cast<int>(re_.size());

				  re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);

				  if (re->op() == kRegexpConcat) {

				    int nsub = re->nsub();

				    re2::Regexp** sub = new re2::Regexp*[nsub + 1];

				    for (int i = 0; i < nsub; i++)

				      sub[i] = re->sub()[i]->Incref();

				    sub[nsub] = m;

				    re->Decref();

				    re = re2::Regexp::Concat(sub, nsub + 1, pf);

				    delete[] sub;

				  } else {

				    re2::Regexp* sub[2];

				    sub[0] = re;

				    sub[1] = m;

				    re = re2::Regexp::Concat(sub, 2, pf);

				  }

				  re_.push_back(re);

				  return n;

				}

				bool RE2::Set::Compile() {

				  if (compiled_) {

				    LOG(DFATAL) << "RE2::Set::Compile multiple times";

				    return false;

				  }

				  compiled_ = true;

				  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(

				    options_.ParseFlags());

				  re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(re_.data()),

				                                           static_cast<int>(re_.size()), pf);

				  re_.clear();

				  re2::Regexp* sre = re->Simplify();

				  re->Decref();

				  re = sre;

				  if (re == NULL) {

				    if (options_.log_errors())

				      LOG(ERROR) << "Error simplifying during Compile.";

				    return false;

				  }

				  prog_ = Prog::CompileSet(options_, anchor_, re);

				  return prog_ != NULL;

				}

				bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {

				  if (!compiled_) {

				    LOG(DFATAL) << "RE2::Set::Match without Compile";

				    return false;

				  }

				  v->clear();

				  bool failed;

				  bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,

				                              Prog::kManyMatch, NULL, &failed, v);

				  if (failed)

				    LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space";

				  if (ret == false)

				    return false;

				  if (v->size() == 0) {

				    LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";

				    return false;

				  }

				  return true;

				}

									
										55

third_party/re2/re2/set.h
									
										vendored
									
				@ -1,55 +0,0 @@

				// Copyright 2010 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				#ifndef RE2_SET_H

				#define RE2_SET_H

				#include <utility>

				#include <vector>

				#include "re2/re2.h"

				namespace re2 {

				using std::vector;

				// An RE2::Set represents a collection of regexps that can

				// be searched for simultaneously.

				class RE2::Set {

				 public:

				  Set(const RE2::Options& options, RE2::Anchor anchor);

				  ~Set();

				  // Add adds regexp pattern to the set, interpreted using the RE2 options.

				  // (The RE2 constructor's default options parameter is RE2::UTF8.)

				  // Add returns the regexp index that will be used to identify

				  // it in the result of Match, or -1 if the regexp cannot be parsed.

				  // Indices are assigned in sequential order starting from 0.

				  // Error returns do not increment the index.

				  // If an error occurs and error != NULL, *error will hold an error message.

				  int Add(const StringPiece& pattern, string* error);

				  // Compile prepares the Set for matching.

				  // Add must not be called again after Compile.

				  // Compile must be called before FullMatch or PartialMatch.

				  // Compile may return false if it runs out of memory.

				  bool Compile();

				  // Match returns true if text matches any of the regexps in the set.

				  // If so, it fills v with the indices of the matching regexps.

				  bool Match(const StringPiece& text, vector<int>* v) const;

				 private:

				  RE2::Options options_;

				  RE2::Anchor anchor_;

				  vector<re2::Regexp*> re_;

				  re2::Prog* prog_;

				  bool compiled_;

				  //DISALLOW_COPY_AND_ASSIGN(Set);

				  Set(const Set&);

				  void operator=(const Set&);

				};

				}  // namespace re2

				#endif  // RE2_SET_H

									
										652

third_party/re2/re2/simplify.cc
									
										vendored
									
				@ -1,652 +0,0 @@

				// Copyright 2006 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Rewrite POSIX and other features in re

				// to use simple extended regular expression features.

				// Also sort and simplify character classes.

				#include "util/util.h"

				#include "re2/regexp.h"

				#include "re2/walker-inl.h"

				namespace re2 {

				// Parses the regexp src and then simplifies it and sets *dst to the

				// string representation of the simplified form.  Returns true on success.

				// Returns false and sets *error (if error != NULL) on error.

				bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,

				                            string* dst,

				                            RegexpStatus* status) {

				  Regexp* re = Parse(src, flags, status);

				  if (re == NULL)

				    return false;

				  Regexp* sre = re->Simplify();

				  re->Decref();

				  if (sre == NULL) {

				    // Should not happen, since Simplify never fails.

				    LOG(ERROR) << "Simplify failed on " << src;

				    if (status) {

				      status->set_code(kRegexpInternalError);

				      status->set_error_arg(src);

				    }

				    return false;

				  }

				  *dst = sre->ToString();

				  sre->Decref();

				  return true;

				}

				// Assuming the simple_ flags on the children are accurate,

				// is this Regexp* simple?

				bool Regexp::ComputeSimple() {

				  Regexp** subs;

				  switch (op_) {

				    case kRegexpNoMatch:

				    case kRegexpEmptyMatch:

				    case kRegexpLiteral:

				    case kRegexpLiteralString:

				    case kRegexpBeginLine:

				    case kRegexpEndLine:

				    case kRegexpBeginText:

				    case kRegexpWordBoundary:

				    case kRegexpNoWordBoundary:

				    case kRegexpEndText:

				    case kRegexpAnyChar:

				    case kRegexpAnyByte:

				    case kRegexpHaveMatch:

				      return true;

				    case kRegexpConcat:

				    case kRegexpAlternate:

				      // These are simple as long as the subpieces are simple.

				      subs = sub();

				      for (int i = 0; i < nsub_; i++)

				        if (!subs[i]->simple())

				          return false;

				      return true;

				    case kRegexpCharClass:

				      // Simple as long as the char class is not empty, not full.

				      if (ccb_ != NULL)

				        return !ccb_->empty() && !ccb_->full();

				      return !cc_->empty() && !cc_->full();

				    case kRegexpCapture:

				      subs = sub();

				      return subs[0]->simple();

				    case kRegexpStar:

				    case kRegexpPlus:

				    case kRegexpQuest:

				      subs = sub();

				      if (!subs[0]->simple())

				        return false;

				      switch (subs[0]->op_) {

				        case kRegexpStar:

				        case kRegexpPlus:

				        case kRegexpQuest:

				        case kRegexpEmptyMatch:

				        case kRegexpNoMatch:

				          return false;

				        default:

				          break;

				      }

				      return true;

				    case kRegexpRepeat:

				      return false;

				  }

				  LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;

				  return false;

				}

				// Walker subclass used by Simplify.

				// Coalesces runs of star/plus/quest/repeat of the same literal along with any

				// occurrences of that literal into repeats of that literal. It also works for

				// char classes, any char and any byte.

				// PostVisit creates the coalesced result, which should then be simplified.

				class CoalesceWalker : public Regexp::Walker<Regexp*> {

				 public:

				  CoalesceWalker() {}

				  virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,

				                            Regexp** child_args, int nchild_args);

				  virtual Regexp* Copy(Regexp* re);

				  virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);

				 private:

				  // These functions are declared inside CoalesceWalker so that

				  // they can edit the private fields of the Regexps they construct.

				  // Returns true if r1 and r2 can be coalesced. In particular, ensures that

				  // the parse flags are consistent. (They will not be checked again later.)

				  static bool CanCoalesce(Regexp* r1, Regexp* r2);

				  // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards

				  // will be empty match and the coalesced op. In other cases, where part of a

				  // literal string was removed to be coalesced, the array elements afterwards

				  // will be the coalesced op and the remainder of the literal string.

				  static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);

				  DISALLOW_COPY_AND_ASSIGN(CoalesceWalker);

				};

				// Walker subclass used by Simplify.

				// The simplify walk is purely post-recursive: given the simplified children,

				// PostVisit creates the simplified result.

				// The child_args are simplified Regexp*s.

				class SimplifyWalker : public Regexp::Walker<Regexp*> {

				 public:

				  SimplifyWalker() {}

				  virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);

				  virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,

				                            Regexp** child_args, int nchild_args);

				  virtual Regexp* Copy(Regexp* re);

				  virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);

				 private:

				  // These functions are declared inside SimplifyWalker so that

				  // they can edit the private fields of the Regexps they construct.

				  // Creates a concatenation of two Regexp, consuming refs to re1 and re2.

				  // Caller must Decref return value when done with it.

				  static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);

				  // Simplifies the expression re{min,max} in terms of *, +, and ?.

				  // Returns a new regexp.  Does not edit re.  Does not consume reference to re.

				  // Caller must Decref return value when done with it.

				  static Regexp* SimplifyRepeat(Regexp* re, int min, int max,

				                                Regexp::ParseFlags parse_flags);

				  // Simplifies a character class by expanding any named classes

				  // into rune ranges.  Does not edit re.  Does not consume ref to re.

				  // Caller must Decref return value when done with it.

				  static Regexp* SimplifyCharClass(Regexp* re);

				  DISALLOW_COPY_AND_ASSIGN(SimplifyWalker);

				};

				// Simplifies a regular expression, returning a new regexp.

				// The new regexp uses traditional Unix egrep features only,

				// plus the Perl (?:) non-capturing parentheses.

				// Otherwise, no POSIX or Perl additions.  The new regexp

				// captures exactly the same subexpressions (with the same indices)

				// as the original.

				// Does not edit current object.

				// Caller must Decref() return value when done with it.

				Regexp* Regexp::Simplify() {

				  CoalesceWalker cw;

				  Regexp* cre = cw.Walk(this, NULL);

				  if (cre == NULL)

				    return cre;

				  SimplifyWalker sw;

				  Regexp* sre = sw.Walk(cre, NULL);

				  cre->Decref();

				  return sre;

				}

				#define Simplify DontCallSimplify  // Avoid accidental recursion

				// Utility function for PostVisit implementations that compares re->sub() with

				// child_args to determine whether any child_args changed. In the common case,

				// where nothing changed, calls Decref() for all child_args and returns false,

				// so PostVisit must return re->Incref(). Otherwise, returns true.

				static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {

				  for (int i = 0; i < re->nsub(); i++) {

				    Regexp* sub = re->sub()[i];

				    Regexp* newsub = child_args[i];

				    if (newsub != sub)

				      return true;

				  }

				  for (int i = 0; i < re->nsub(); i++) {

				    Regexp* newsub = child_args[i];

				    newsub->Decref();

				  }

				  return false;

				}

				Regexp* CoalesceWalker::Copy(Regexp* re) {

				  return re->Incref();

				}

				Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {

				  // This should never be called, since we use Walk and not

				  // WalkExponential.

				  LOG(DFATAL) << "CoalesceWalker::ShortVisit called";

				  return re->Incref();

				}

				Regexp* CoalesceWalker::PostVisit(Regexp* re,

				                                  Regexp* parent_arg,

				                                  Regexp* pre_arg,

				                                  Regexp** child_args,

				                                  int nchild_args) {

				  if (re->nsub() == 0)

				    return re->Incref();

				  if (re->op() != kRegexpConcat) {

				    if (!ChildArgsChanged(re, child_args))

				      return re->Incref();

				    // Something changed. Build a new op.

				    Regexp* nre = new Regexp(re->op(), re->parse_flags());

				    nre->AllocSub(re->nsub());

				    Regexp** nre_subs = nre->sub();

				    for (int i = 0; i < re->nsub(); i++)

				      nre_subs[i] = child_args[i];

				    // Repeats and Captures have additional data that must be copied.

				    if (re->op() == kRegexpRepeat) {

				      nre->min_ = re->min();

				      nre->max_ = re->max();

				    } else if (re->op() == kRegexpCapture) {

				      nre->cap_ = re->cap();

				    }

				    return nre;

				  }

				  bool can_coalesce = false;

				  for (int i = 0; i < re->nsub(); i++) {

				    if (i+1 < re->nsub() &&

				        CanCoalesce(child_args[i], child_args[i+1])) {

				      can_coalesce = true;

				      break;

				    }

				  }

				  if (!can_coalesce) {

				    if (!ChildArgsChanged(re, child_args))

				      return re->Incref();

				    // Something changed. Build a new op.

				    Regexp* nre = new Regexp(re->op(), re->parse_flags());

				    nre->AllocSub(re->nsub());

				    Regexp** nre_subs = nre->sub();

				    for (int i = 0; i < re->nsub(); i++)

				      nre_subs[i] = child_args[i];

				    return nre;

				  }

				  for (int i = 0; i < re->nsub(); i++) {

				    if (i+1 < re->nsub() &&

				        CanCoalesce(child_args[i], child_args[i+1]))

				      DoCoalesce(&child_args[i], &child_args[i+1]);

				  }

				  // Determine how many empty matches were left by DoCoalesce.

				  int n = 0;

				  for (int i = n; i < re->nsub(); i++) {

				    if (child_args[i]->op() == kRegexpEmptyMatch)

				      n++;

				  }

				  // Build a new op.

				  Regexp* nre = new Regexp(re->op(), re->parse_flags());

				  nre->AllocSub(re->nsub() - n);

				  Regexp** nre_subs = nre->sub();

				  for (int i = 0, j = 0; i < re->nsub(); i++) {

				    if (child_args[i]->op() == kRegexpEmptyMatch) {

				      child_args[i]->Decref();

				      continue;

				    }

				    nre_subs[j] = child_args[i];

				    j++;

				  }

				  return nre;

				}

				bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {

				  // r1 must be a star/plus/quest/repeat of a literal, char class, any char or

				  // any byte.

				  if ((r1->op() == kRegexpStar ||

				       r1->op() == kRegexpPlus ||

				       r1->op() == kRegexpQuest ||

				       r1->op() == kRegexpRepeat) &&

				      (r1->sub()[0]->op() == kRegexpLiteral ||

				       r1->sub()[0]->op() == kRegexpCharClass ||

				       r1->sub()[0]->op() == kRegexpAnyChar ||

				       r1->sub()[0]->op() == kRegexpAnyByte)) {

				    // r2 must be a star/plus/quest/repeat of the same literal, char class,

				    // any char or any byte.

				    if ((r2->op() == kRegexpStar ||

				         r2->op() == kRegexpPlus ||

				         r2->op() == kRegexpQuest ||

				         r2->op() == kRegexpRepeat) &&

				        Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&

				        // The parse flags must be consistent.

				        ((r1->parse_flags() & Regexp::NonGreedy) ==

				         (r2->parse_flags() & Regexp::NonGreedy))) {

				      return true;

				    }

				    // ... OR an occurrence of that literal, char class, any char or any byte

				    if (Regexp::Equal(r1->sub()[0], r2)) {

				      return true;

				    }

				    // ... OR a literal string that begins with that literal.

				    if (r1->sub()[0]->op() == kRegexpLiteral &&

				        r2->op() == kRegexpLiteralString &&

				        r2->runes()[0] == r1->sub()[0]->rune() &&

				        // The parse flags must be consistent.

				        ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==

				         (r2->parse_flags() & Regexp::FoldCase))) {

				      return true;

				    }

				  }

				  return false;

				}

				void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {

				  Regexp* r1 = *r1ptr;

				  Regexp* r2 = *r2ptr;

				  Regexp* nre = Regexp::Repeat(

				      r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);

				  switch (r1->op()) {

				    case kRegexpStar:

				      nre->min_ = 0;

				      nre->max_ = -1;

				      break;

				    case kRegexpPlus:

				      nre->min_ = 1;

				      nre->max_ = -1;

				      break;

				    case kRegexpQuest:

				      nre->min_ = 0;

				      nre->max_ = 1;

				      break;

				    case kRegexpRepeat:

				      nre->min_ = r1->min();

				      nre->max_ = r1->max();

				      break;

				    default:

				      LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();

				      nre->Decref();

				      return;

				  }

				  switch (r2->op()) {

				    case kRegexpStar:

				      nre->max_ = -1;

				      goto LeaveEmpty;

				    case kRegexpPlus:

				      nre->min_++;

				      nre->max_ = -1;

				      goto LeaveEmpty;

				    case kRegexpQuest:

				      if (nre->max() != -1)

				        nre->max_++;

				      goto LeaveEmpty;

				    case kRegexpRepeat:

				      nre->min_ += r2->min();

				      if (r2->max() == -1)

				        nre->max_ = -1;

				      else if (nre->max() != -1)

				        nre->max_ += r2->max();

				      goto LeaveEmpty;

				    case kRegexpLiteral:

				    case kRegexpCharClass:

				    case kRegexpAnyChar:

				    case kRegexpAnyByte:

				      nre->min_++;

				      if (nre->max() != -1)

				        nre->max_++;

				      goto LeaveEmpty;

				    LeaveEmpty:

				      *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);

				      *r2ptr = nre;

				      break;

				    case kRegexpLiteralString: {

				      Rune r = r1->sub()[0]->rune();

				      // Determine how much of the literal string is removed.

				      // We know that we have at least one rune. :)

				      int n = 1;

				      while (n < r2->nrunes() && r2->runes()[n] == r)

				        n++;

				      nre->min_ += n;

				      if (nre->max() != -1)

				        nre->max_ += n;

				      if (n == r2->nrunes())

				        goto LeaveEmpty;

				      *r1ptr = nre;

				      *r2ptr = Regexp::LiteralString(

				          &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());

				      break;

				    }

				    default:

				      LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();

				      nre->Decref();

				      return;

				  }

				  r1->Decref();

				  r2->Decref();

				}

				Regexp* SimplifyWalker::Copy(Regexp* re) {

				  return re->Incref();

				}

				Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {

				  // This should never be called, since we use Walk and not

				  // WalkExponential.

				  LOG(DFATAL) << "SimplifyWalker::ShortVisit called";

				  return re->Incref();

				}

				Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {

				  if (re->simple()) {

				    *stop = true;

				    return re->Incref();

				  }

				  return NULL;

				}

				Regexp* SimplifyWalker::PostVisit(Regexp* re,

				                                  Regexp* parent_arg,

				                                  Regexp* pre_arg,

				                                  Regexp** child_args,

				                                  int nchild_args) {

				  switch (re->op()) {

				    case kRegexpNoMatch:

				    case kRegexpEmptyMatch:

				    case kRegexpLiteral:

				    case kRegexpLiteralString:

				    case kRegexpBeginLine:

				    case kRegexpEndLine:

				    case kRegexpBeginText:

				    case kRegexpWordBoundary:

				    case kRegexpNoWordBoundary:

				    case kRegexpEndText:

				    case kRegexpAnyChar:

				    case kRegexpAnyByte:

				    case kRegexpHaveMatch:

				      // All these are always simple.

				      re->simple_ = true;

				      return re->Incref();

				    case kRegexpConcat:

				    case kRegexpAlternate: {

				      // These are simple as long as the subpieces are simple.

				      if (!ChildArgsChanged(re, child_args)) {

				        re->simple_ = true;

				        return re->Incref();

				      }

				      Regexp* nre = new Regexp(re->op(), re->parse_flags());

				      nre->AllocSub(re->nsub());

				      Regexp** nre_subs = nre->sub();

				      for (int i = 0; i < re->nsub(); i++)

				        nre_subs[i] = child_args[i];

				      nre->simple_ = true;

				      return nre;

				    }

				    case kRegexpCapture: {

				      Regexp* newsub = child_args[0];

				      if (newsub == re->sub()[0]) {

				        newsub->Decref();

				        re->simple_ = true;

				        return re->Incref();

				      }

				      Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());

				      nre->AllocSub(1);

				      nre->sub()[0] = newsub;

				      nre->cap_ = re->cap();

				      nre->simple_ = true;

				      return nre;

				    }

				    case kRegexpStar:

				    case kRegexpPlus:

				    case kRegexpQuest: {

				      Regexp* newsub = child_args[0];

				      // Special case: repeat the empty string as much as

				      // you want, but it's still the empty string.

				      if (newsub->op() == kRegexpEmptyMatch)

				        return newsub;

				      // These are simple as long as the subpiece is simple.

				      if (newsub == re->sub()[0]) {

				        newsub->Decref();

				        re->simple_ = true;

				        return re->Incref();

				      }

				      // These are also idempotent if flags are constant.

				      if (re->op() == newsub->op() &&

				          re->parse_flags() == newsub->parse_flags())

				        return newsub;

				      Regexp* nre = new Regexp(re->op(), re->parse_flags());

				      nre->AllocSub(1);

				      nre->sub()[0] = newsub;

				      nre->simple_ = true;

				      return nre;

				    }

				    case kRegexpRepeat: {

				      Regexp* newsub = child_args[0];

				      // Special case: repeat the empty string as much as

				      // you want, but it's still the empty string.

				      if (newsub->op() == kRegexpEmptyMatch)

				        return newsub;

				      Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,

				                                   re->parse_flags());

				      newsub->Decref();

				      nre->simple_ = true;

				      return nre;

				    }

				    case kRegexpCharClass: {

				      Regexp* nre = SimplifyCharClass(re);

				      nre->simple_ = true;

				      return nre;

				    }

				  }

				  LOG(ERROR) << "Simplify case not handled: " << re->op();

				  return re->Incref();

				}

				// Creates a concatenation of two Regexp, consuming refs to re1 and re2.

				// Returns a new Regexp, handing the ref to the caller.

				Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,

				                                Regexp::ParseFlags parse_flags) {

				  Regexp* re = new Regexp(kRegexpConcat, parse_flags);

				  re->AllocSub(2);

				  Regexp** subs = re->sub();

				  subs[0] = re1;

				  subs[1] = re2;

				  return re;

				}

				// Simplifies the expression re{min,max} in terms of *, +, and ?.

				// Returns a new regexp.  Does not edit re.  Does not consume reference to re.

				// Caller must Decref return value when done with it.

				// The result will *not* necessarily have the right capturing parens

				// if you call ToString() and re-parse it: (x){2} becomes (x)(x),

				// but in the Regexp* representation, both (x) are marked as $1.

				Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,

				                                       Regexp::ParseFlags f) {

				  // x{n,} means at least n matches of x.

				  if (max == -1) {

				    // Special case: x{0,} is x*

				    if (min == 0)

				      return Regexp::Star(re->Incref(), f);

				    // Special case: x{1,} is x+

				    if (min == 1)

				      return Regexp::Plus(re->Incref(), f);

				    // General case: x{4,} is xxxx+

				    Regexp* nre = new Regexp(kRegexpConcat, f);

				    nre->AllocSub(min);

				    Regexp** nre_subs = nre->sub();

				    for (int i = 0; i < min-1; i++)

				      nre_subs[i] = re->Incref();

				    nre_subs[min-1] = Regexp::Plus(re->Incref(), f);

				    return nre;

				  }

				  // Special case: (x){0} matches only empty string.

				  if (min == 0 && max == 0)

				    return new Regexp(kRegexpEmptyMatch, f);

				  // Special case: x{1} is just x.

				  if (min == 1 && max == 1)

				    return re->Incref();

				  // General case: x{n,m} means n copies of x and m copies of x?.

				  // The machine will do less work if we nest the final m copies,

				  // so that x{2,5} = xx(x(x(x)?)?)?

				  // Build leading prefix: xx.  Capturing only on the last one.

				  Regexp* nre = NULL;

				  if (min > 0) {

				    nre = new Regexp(kRegexpConcat, f);

				    nre->AllocSub(min);

				    Regexp** nre_subs = nre->sub();

				    for (int i = 0; i < min; i++)

				      nre_subs[i] = re->Incref();

				  }

				  // Build and attach suffix: (x(x(x)?)?)?

				  if (max > min) {

				    Regexp* suf = Regexp::Quest(re->Incref(), f);

				    for (int i = min+1; i < max; i++)

				      suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);

				    if (nre == NULL)

				      nre = suf;

				    else

				      nre = Concat2(nre, suf, f);

				  }

				  if (nre == NULL) {

				    // Some degenerate case, like min > max, or min < max < 0.

				    // This shouldn't happen, because the parser rejects such regexps.

				    LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;

				    return new Regexp(kRegexpNoMatch, f);

				  }

				  return nre;

				}

				// Simplifies a character class.

				// Caller must Decref return value when done with it.

				Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {

				  CharClass* cc = re->cc();

				  // Special cases

				  if (cc->empty())

				    return new Regexp(kRegexpNoMatch, re->parse_flags());

				  if (cc->full())

				    return new Regexp(kRegexpAnyChar, re->parse_flags());

				  return re->Incref();

				}

				}  // namespace re2

									
										98

third_party/re2/re2/stringpiece.cc
									
										vendored
									
				@ -1,98 +0,0 @@

				// Copyright 2004 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				#include "re2/stringpiece.h"

				#include "util/util.h"

				using re2::StringPiece;

				std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {

				  o.write(piece.data(), piece.size());

				  return o;

				}

				bool StringPiece::_equal(const StringPiece& x, const StringPiece& y) {

				  int len = x.size();

				  if (len != y.size()) {

				    return false;

				  }

				  const char* p = x.data();

				  const char* p2 = y.data();

				  // Test last byte in case strings share large common prefix

				  if ((len > 0) && (p[len-1] != p2[len-1])) return false;

				  const char* p_limit = p + len;

				  for (; p < p_limit; p++, p2++) {

				    if (*p != *p2)

				      return false;

				  }

				  return true;

				}

				void StringPiece::CopyToString(string* target) const {

				  target->assign(ptr_, length_);

				}

				void StringPiece::AppendToString(string* target) const {

				  target->append(ptr_, length_);

				}

				StringPiece::size_type StringPiece::copy(char* buf, size_type n,

				                                         size_type pos) const {

				  size_type ret = min(length_ - pos, n);

				  memcpy(buf, ptr_ + pos, ret);

				  return ret;

				}

				bool StringPiece::contains(StringPiece s) const {

				  return find(s, 0) != npos;

				}

				StringPiece::size_type StringPiece::find(const StringPiece& s,

				                                         size_type pos) const {

				  if (length_ < 0 || pos > static_cast<size_type>(length_))

				    return npos;

				  const char* result = std::search(ptr_ + pos, ptr_ + length_,

				                                   s.ptr_, s.ptr_ + s.length_);

				  const size_type xpos = result - ptr_;

				  return xpos + s.length_ <= static_cast<size_type>(length_) ? xpos : npos;

				}

				StringPiece::size_type StringPiece::find(char c, size_type pos) const {

				  if (length_ <= 0 || pos >= static_cast<size_type>(length_)) {

				    return npos;

				  }

				  const char* result = std::find(ptr_ + pos, ptr_ + length_, c);

				  return result != ptr_ + length_ ? result - ptr_ : npos;

				}

				StringPiece::size_type StringPiece::rfind(const StringPiece& s,

				                                          size_type pos) const {

				  if (length_ < s.length_) return npos;

				  const size_type ulen = length_;

				  if (s.length_ == 0) return min(ulen, pos);

				  const char* last = ptr_ + min(ulen - s.length_, pos) + s.length_;

				  const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_);

				  return result != last ? result - ptr_ : npos;

				}

				StringPiece::size_type StringPiece::rfind(char c, size_type pos) const {

				  if (length_ <= 0) return npos;

				  for (int i = static_cast<int>(min(pos, static_cast<size_type>(length_ - 1)));

				       i >= 0; --i) {

				    if (ptr_[i] == c) {

				      return i;

				    }

				  }

				  return npos;

				}

				StringPiece StringPiece::substr(size_type pos, size_type n) const {

				  if (pos > static_cast<size_type>(length_)) pos = static_cast<size_type>(length_);

				  if (n > length_ - pos) n = length_ - pos;

				  return StringPiece(ptr_ + pos, static_cast<int>(n));

				}

				const StringPiece::size_type StringPiece::npos = size_type(-1);

									
										185

third_party/re2/re2/stringpiece.h
									
										vendored
									
				@ -1,185 +0,0 @@

				// Copyright 2001-2010 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// A string-like object that points to a sized piece of memory.

				//

				// Functions or methods may use const StringPiece& parameters to accept either

				// a "const char*" or a "string" value that will be implicitly converted to

				// a StringPiece.  The implicit conversion means that it is often appropriate

				// to include this .h file in other files rather than forward-declaring

				// StringPiece as would be appropriate for most other Google classes.

				//

				// Systematic usage of StringPiece is encouraged as it will reduce unnecessary

				// conversions from "const char*" to "string" and back again.

				//

				//

				// Arghh!  I wish C++ literals were "string".

				#ifndef STRINGS_STRINGPIECE_H__

				#define STRINGS_STRINGPIECE_H__

				#include <string.h>

				#include <algorithm>

				#include <cstddef>

				#include <iosfwd>

				#include <string>

				namespace re2 {

				class StringPiece {

				 private:

				  const char*   ptr_;

				  int           length_;

				 public:

				  // We provide non-explicit singleton constructors so users can pass

				  // in a "const char*" or a "string" wherever a "StringPiece" is

				  // expected.

				  StringPiece() : ptr_(NULL), length_(0) { }

				  StringPiece(const char* str)

				    : ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { }

				  StringPiece(const std::string& str)

				    : ptr_(str.data()), length_(static_cast<int>(str.size())) { }

				  StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { }

				  // data() may return a pointer to a buffer with embedded NULs, and the

				  // returned buffer may or may not be null terminated.  Therefore it is

				  // typically a mistake to pass data() to a routine that expects a NUL

				  // terminated string.

				  const char* data() const { return ptr_; }

				  int size() const { return length_; }

				  int length() const { return length_; }

				  bool empty() const { return length_ == 0; }

				  void clear() { ptr_ = NULL; length_ = 0; }

				  void set(const char* data, int len) { ptr_ = data; length_ = len; }

				  void set(const char* str) {

				    ptr_ = str;

				    if (str != NULL)

				      length_ = static_cast<int>(strlen(str));

				    else

				      length_ = 0;

				  }

				  void set(const void* data, int len) {

				    ptr_ = reinterpret_cast<const char*>(data);

				    length_ = len;

				  }

				  char operator[](int i) const { return ptr_[i]; }

				  void remove_prefix(int n) {

				    ptr_ += n;

				    length_ -= n;

				  }

				  void remove_suffix(int n) {

				    length_ -= n;

				  }

				  int compare(const StringPiece& x) const {

				    int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_));

				    if (r == 0) {

				      if (length_ < x.length_) r = -1;

				      else if (length_ > x.length_) r = +1;

				    }

				    return r;

				  }

				  std::string as_string() const {

				    return std::string(data(), size());

				  }

				  // We also define ToString() here, since many other string-like

				  // interfaces name the routine that converts to a C++ string

				  // "ToString", and it's confusing to have the method that does that

				  // for a StringPiece be called "as_string()".  We also leave the

				  // "as_string()" method defined here for existing code.

				  std::string ToString() const {

				    return std::string(data(), size());

				  }

				  void CopyToString(std::string* target) const;

				  void AppendToString(std::string* target) const;

				  // Does "this" start with "x"

				  bool starts_with(const StringPiece& x) const {

				    return ((length_ >= x.length_) &&

				            (memcmp(ptr_, x.ptr_, x.length_) == 0));

				  }

				  // Does "this" end with "x"

				  bool ends_with(const StringPiece& x) const {

				    return ((length_ >= x.length_) &&

				            (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));

				  }

				  // standard STL container boilerplate

				  typedef char value_type;

				  typedef const char* pointer;

				  typedef const char& reference;

				  typedef const char& const_reference;

				  typedef size_t size_type;

				  typedef ptrdiff_t difference_type;

				  static const size_type npos;

				  typedef const char* const_iterator;

				  typedef const char* iterator;

				  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;

				  typedef std::reverse_iterator<iterator> reverse_iterator;

				  iterator begin() const { return ptr_; }

				  iterator end() const { return ptr_ + length_; }

				  const_reverse_iterator rbegin() const {

				    return const_reverse_iterator(ptr_ + length_);

				  }

				  const_reverse_iterator rend() const {

				    return const_reverse_iterator(ptr_);

				  }

				  // STLS says return size_type, but Google says return int

				  int max_size() const { return length_; }

				  int capacity() const { return length_; }

				  size_type copy(char* buf, size_type n, size_type pos = 0) const;

				  bool contains(StringPiece s) const;

				  size_type find(const StringPiece& s, size_type pos = 0) const;

				  size_type find(char c, size_type pos = 0) const;

				  size_type rfind(const StringPiece& s, size_type pos = npos) const;

				  size_type rfind(char c, size_type pos = npos) const;

				  StringPiece substr(size_type pos, size_type n = npos) const;

				  static bool _equal(const StringPiece&, const StringPiece&);

				};

				inline bool operator==(const StringPiece& x, const StringPiece& y) {

				  return StringPiece::_equal(x, y);

				}

				inline bool operator!=(const StringPiece& x, const StringPiece& y) {

				  return !(x == y);

				}

				inline bool operator<(const StringPiece& x, const StringPiece& y) {

				  const int r = memcmp(x.data(), y.data(),

				                       std::min(x.size(), y.size()));

				  return ((r < 0) || ((r == 0) && (x.size() < y.size())));

				}

				inline bool operator>(const StringPiece& x, const StringPiece& y) {

				  return y < x;

				}

				inline bool operator<=(const StringPiece& x, const StringPiece& y) {

				  return !(x > y);

				}

				inline bool operator>=(const StringPiece& x, const StringPiece& y) {

				  return !(x < y);

				}

				}  // namespace re2

				// allow StringPiece to be logged

				extern std::ostream& operator<<(std::ostream& o, const re2::StringPiece& piece);

				#endif  // STRINGS_STRINGPIECE_H__

									
										255

third_party/re2/re2/testing/backtrack.cc
									
										vendored
									
				@ -1,255 +0,0 @@

				// Copyright 2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Tested by search_test.cc, exhaustive_test.cc, tester.cc

				//

				// Prog::BadSearchBacktrack is a backtracking regular expression search,

				// except that it remembers where it has been, trading a lot of

				// memory for a lot of time. It exists only for testing purposes.

				//

				// Let me repeat that.

				//

				// THIS CODE SHOULD NEVER BE USED IN PRODUCTION:

				//   - It uses a ton of memory.

				//   - It uses a ton of stack.

				//   - It uses CHECK and LOG(FATAL).

				//   - It implements unanchored search by repeated anchored search.

				//

				// On the other hand, it is very simple and a good reference

				// implementation for the more complicated regexp packages.

				//

				// In BUILD, this file is linked into the ":testing" library,

				// not the main library, in order to make it harder to pick up

				// accidentally.

				#include "util/util.h"

				#include "re2/prog.h"

				#include "re2/regexp.h"

				namespace re2 {

				// Backtracker holds the state for a backtracking search.

				//

				// Excluding the search parameters, the main search state

				// is just the "capture registers", which record, for the

				// current execution, the string position at which each

				// parenthesis was passed.  cap_[0] and cap_[1] are the

				// left and right parenthesis in $0, cap_[2] and cap_[3] in $1, etc.

				//

				// To avoid infinite loops during backtracking on expressions

				// like (a*)*, the visited_[] bitmap marks the (state, string-position)

				// pairs that have already been explored and are thus not worth

				// re-exploring if we get there via another path.  Modern backtracking

				// libraries engineer their program representation differently, to make

				// such infinite loops possible to avoid without keeping a giant visited_

				// bitmap, but visited_ works fine for a reference implementation

				// and it has the nice benefit of making the search run in linear time.

				class Backtracker {

				 public:

				  explicit Backtracker(Prog* prog);

				  ~Backtracker();

				  bool Search(const StringPiece& text, const StringPiece& context,

				              bool anchored, bool longest,

				              StringPiece* submatch, int nsubmatch);

				 private:

				  // Explores from instruction ip at string position p looking for a match.

				  // Returns true if found (so that caller can stop trying other possibilities).

				  bool Visit(int id, const char* p);

				  // Search parameters

				  Prog* prog_;              // program being run

				  StringPiece text_;        // text being searched

				  StringPiece context_;     // greater context of text being searched

				  bool anchored_;           // whether search is anchored at text.begin()

				  bool longest_;            // whether search wants leftmost-longest match

				  bool endmatch_;           // whether search must end at text.end()

				  StringPiece *submatch_;   // submatches to fill in

				  int nsubmatch_;           //   # of submatches to fill in

				  // Search state

				  const char* cap_[64];     // capture registers

				  uint32 *visited_;         // bitmap: (Inst*, char*) pairs already backtracked

				  size_t nvisited_;         //   # of words in bitmap

				};

				Backtracker::Backtracker(Prog* prog)

				  : prog_(prog),

				    anchored_(false),

				    longest_(false),

				    endmatch_(false),

				    submatch_(NULL),

				    nsubmatch_(0),

				    visited_(NULL),

				    nvisited_(0) {

				}

				Backtracker::~Backtracker() {

				  delete[] visited_;

				}

				// Runs a backtracking search.

				bool Backtracker::Search(const StringPiece& text, const StringPiece& context,

				                         bool anchored, bool longest,

				                         StringPiece* submatch, int nsubmatch) {

				  text_ = text;

				  context_ = context;

				  if (context_.begin() == NULL)

				    context_ = text;

				  if (prog_->anchor_start() && text.begin() > context_.begin())

				    return false;

				  if (prog_->anchor_end() && text.end() < context_.end())

				    return false;

				  anchored_ = anchored | prog_->anchor_start();

				  longest_ = longest | prog_->anchor_end();

				  endmatch_ = prog_->anchor_end();

				  submatch_ = submatch;

				  nsubmatch_ = nsubmatch;

				  CHECK(2*nsubmatch_ < arraysize(cap_));

				  memset(cap_, 0, sizeof cap_);

				  // We use submatch_[0] for our own bookkeeping,

				  // so it had better exist.

				  StringPiece sp0;

				  if (nsubmatch < 1) {

				    submatch_ = &sp0;

				    nsubmatch_ = 1;

				  }

				  submatch_[0] = NULL;

				  // Allocate new visited_ bitmap -- size is proportional

				  // to text, so have to reallocate on each call to Search.

				  delete[] visited_;

				  nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;

				  visited_ = new uint32[nvisited_];

				  memset(visited_, 0, nvisited_*sizeof visited_[0]);

				  // Anchored search must start at text.begin().

				  if (anchored_) {

				    cap_[0] = text.begin();

				    return Visit(prog_->start(), text.begin());

				  }

				  // Unanchored search, starting from each possible text position.

				  // Notice that we have to try the empty string at the end of

				  // the text, so the loop condition is p <= text.end(), not p < text.end().

				  for (const char* p = text.begin(); p <= text.end(); p++) {

				    cap_[0] = p;

				    if (Visit(prog_->start(), p))  // Match must be leftmost; done.

				      return true;

				  }

				  return false;

				}

				// Explores from instruction ip at string position p looking for a match.

				// Return true if found (so that caller can stop trying other possibilities).

				bool Backtracker::Visit(int id, const char* p) {

				  // Check bitmap.  If we've already explored from here,

				  // either it didn't match or it did but we're hoping for a better match.

				  // Either way, don't go down that road again.

				  CHECK(p <= text_.end());

				  size_t n = id*(text_.size()+1) + (p - text_.begin());

				  CHECK_LT(n/32, nvisited_);

				  if (visited_[n/32] & (1 << (n&31)))

				    return false;

				  visited_[n/32] |= 1 << (n&31);

				  // Pick out byte at current position.  If at end of string,

				  // have to explore in hope of finishing a match.  Use impossible byte -1.

				  int c = -1;

				  if (p < text_.end())

				    c = *p & 0xFF;

				  Prog::Inst* ip = prog_->inst(id);

				  switch (ip->opcode()) {

				    default:

				      LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();

				      return false;  // not reached

				    case kInstAlt:

				    case kInstAltMatch:

				      // Try both possible next states: out is preferred to out1.

				      if (Visit(ip->out(), p)) {

				        if (longest_)

				          Visit(ip->out1(), p);

				        return true;

				      }

				      return Visit(ip->out1(), p);

				    case kInstByteRange:

				      if (ip->Matches(c))

				        return Visit(ip->out(), p+1);

				      return false;

				    case kInstCapture:

				      if (0 <= ip->cap() && ip->cap() < arraysize(cap_)) {

				        // Capture p to register, but save old value.

				        const char* q = cap_[ip->cap()];

				        cap_[ip->cap()] = p;

				        bool ret = Visit(ip->out(), p);

				        // Restore old value as we backtrack.

				        cap_[ip->cap()] = q;

				        return ret;

				      }

				      return Visit(ip->out(), p);

				    case kInstEmptyWidth:

				      if (ip->empty() & ~Prog::EmptyFlags(context_, p))

				        return false;

				      return Visit(ip->out(), p);

				    case kInstNop:

				      return Visit(ip->out(), p);

				    case kInstMatch:

				      // We found a match.  If it's the best so far, record the

				      // parameters in the caller's submatch_ array.

				      if (endmatch_ && p != context_.end())

				        return false;

				      cap_[1] = p;

				      if (submatch_[0].data() == NULL ||           // First match so far ...

				          (longest_ && p > submatch_[0].end())) {  // ... or better match

				        for (int i = 0; i < nsubmatch_; i++)

				          submatch_[i].set(cap_[2*i],

				                           static_cast<int>(cap_[2*i+1] - cap_[2*i]));

				      }

				      return true;

				    case kInstFail:

				      return false;

				  }

				}

				// Runs a backtracking search.

				bool Prog::UnsafeSearchBacktrack(const StringPiece& text,

				                                 const StringPiece& context,

				                                 Anchor anchor,

				                                 MatchKind kind,

				                                 StringPiece* match,

				                                 int nmatch) {

				  // If full match, we ask for an anchored longest match

				  // and then check that match[0] == text.

				  // So make sure match[0] exists.

				  StringPiece sp0;

				  if (kind == kFullMatch) {

				    anchor = kAnchored;

				    if (nmatch < 1) {

				      match = &sp0;

				      nmatch = 1;

				    }

				  }

				  // Run the search.

				  Backtracker b(this);

				  bool anchored = anchor == kAnchored;

				  bool longest = kind != kFirstMatch;

				  if (!b.Search(text, context, anchored, longest, match, nmatch))

				    return false;

				  if (kind == kFullMatch && match[0].end() != text.end())

				    return false;

				  return true;

				}

				}  // namespace re2

									
										223

third_party/re2/re2/testing/charclass_test.cc
									
										vendored
									
				@ -1,223 +0,0 @@

				// Copyright 2006 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Test character class manipulations.

				#include "util/test.h"

				#include "re2/regexp.h"

				namespace re2 {

				struct CCTest {

				  struct {

				    Rune lo;

				    Rune hi;

				  } add[10];

				  int remove;

				  struct {

				    Rune lo;

				    Rune hi;

				  } final[10];

				};

				static CCTest tests[] = {

				  { { { 10, 20 }, {-1} }, -1,

				    { { 10, 20 }, {-1} } },

				  { { { 10, 20 }, { 20, 30 }, {-1} }, -1,

				    { { 10, 30 }, {-1} } },

				  { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,

				    { { 10, 40 }, {-1} } },

				  { { { 0, 50 }, { 20, 30 }, {-1} }, -1,

				    { { 0, 50 }, {-1} } },

				  { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,

				    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },

				  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,

				    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },

				  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,

				    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },

				  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,

				    { { 5, 25 }, {-1} } },

				  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,

				    { { 10, 23 }, {-1} } },

				  // These check boundary cases during negation.

				  { { { 0, Runemax }, {-1} }, -1,

				    { { 0, Runemax }, {-1} } },

				  { { { 0, 50 }, {-1} }, -1,

				    { { 0, 50 }, {-1} } },

				  { { { 50, Runemax }, {-1} }, -1,

				    { { 50, Runemax }, {-1} } },

				  // Check RemoveAbove.

				  { { { 50, Runemax }, {-1} }, 255,

				    { { 50, 255 }, {-1} } },

				  { { { 50, Runemax }, {-1} }, 65535,

				    { { 50, 65535 }, {-1} } },

				  { { { 50, Runemax }, {-1} }, Runemax,

				    { { 50, Runemax }, {-1} } },

				  { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,

				    { { 50, 60 }, { 250, 255 }, {-1} } },

				  { { { 50, 60 }, {-1} }, 255,

				    { { 50, 60 }, {-1} } },

				  { { { 350, 360 }, {-1} }, 255,

				    { {-1} } },

				  { { {-1} }, 255,

				    { {-1} } },

				};

				template<class CharClass>

				static void Broke(const char *desc, const CCTest* t, CharClass* cc) {

				  if (t == NULL) {

				    printf("\t%s:", desc);

				  } else {

				    printf("\n");

				    printf("CharClass added: [%s]", desc);

				    for (int k = 0; t->add[k].lo >= 0; k++)

				      printf(" %d-%d", t->add[k].lo, t->add[k].hi);

				    printf("\n");

				    if (t->remove >= 0)

				      printf("Removed > %d\n", t->remove);

				    printf("\twant:");

				    for (int k = 0; t->final[k].lo >= 0; k++)

				      printf(" %d-%d", t->final[k].lo, t->final[k].hi);

				    printf("\n");

				    printf("\thave:");

				  }

				  for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)

				    printf(" %d-%d", it->lo, it->hi);

				  printf("\n");

				}

				bool ShouldContain(CCTest *t, int x) {

				  for (int j = 0; t->final[j].lo >= 0; j++)

				    if (t->final[j].lo <= x && x <= t->final[j].hi)

				      return true;

				  return false;

				}

				// Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.

				CharClass* Negate(CharClass *cc) {

				  return cc->Negate();

				}

				void Delete(CharClass* cc) {

				  cc->Delete();

				}

				CharClassBuilder* Negate(CharClassBuilder* cc) {

				  CharClassBuilder* ncc = cc->Copy();

				  ncc->Negate();

				  return ncc;

				}

				void Delete(CharClassBuilder* cc) {

				  delete cc;

				}

				template<class CharClass>

				bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {

				  typename CharClass::iterator it = cc->begin();

				  int size = 0;

				  for (int j = 0; t->final[j].lo >= 0; j++, ++it) {

				    if (it == cc->end() ||

				        it->lo != t->final[j].lo ||

				        it->hi != t->final[j].hi) {

				      Broke(desc, t, cc);

				      return false;

				    }

				    size += it->hi - it->lo + 1;

				  }

				  if (it != cc->end()) {

				    Broke(desc, t, cc);

				    return false;

				  }

				  if (cc->size() != size) {

				    Broke(desc, t, cc);

				    printf("wrong size: want %d have %d\n", size, cc->size());

				    return false;

				  }

				  for (int j = 0; j < 101; j++) {

				    if (j == 100)

				      j = Runemax;

				    if (ShouldContain(t, j) != cc->Contains(j)) {

				      Broke(desc, t, cc);

				      printf("want contains(%d)=%d, got %d\n",

				             j, ShouldContain(t, j), cc->Contains(j));

				      return false;

				    }

				  }

				  CharClass* ncc = Negate(cc);

				  for (int j = 0; j < 101; j++) {

				    if (j == 100)

				      j = Runemax;

				    if (ShouldContain(t, j) == ncc->Contains(j)) {

				      Broke(desc, t, cc);

				      Broke("ncc", NULL, ncc);

				      printf("want ncc contains(%d)!=%d, got %d\n",

				             j, ShouldContain(t, j), ncc->Contains(j));

				      Delete(ncc);

				      return false;

				    }

				    if (ncc->size() != Runemax+1 - cc->size()) {

				      Broke(desc, t, cc);

				      Broke("ncc", NULL, ncc);

				      printf("ncc size should be %d is %d\n",

				             Runemax+1 - cc->size(), ncc->size());

				      Delete(ncc);

				      return false;

				    }

				  }

				  Delete(ncc);

				  return true;

				}

				TEST(TestCharClassBuilder, Adds) {

				  int nfail = 0;

				  for (int i = 0; i < arraysize(tests); i++) {

				    CharClassBuilder ccb;

				    CCTest* t = &tests[i];

				    for (int j = 0; t->add[j].lo >= 0; j++)

				      ccb.AddRange(t->add[j].lo, t->add[j].hi);

				    if (t->remove >= 0)

				      ccb.RemoveAbove(t->remove);

				    if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))

				      nfail++;

				    CharClass* cc = ccb.GetCharClass();

				    if (!CorrectCC(cc, t, "before copy (CharClass)"))

				      nfail++;

				    cc->Delete();

				    CharClassBuilder *ccb1 = ccb.Copy();

				    if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))

				      nfail++;

				    cc = ccb.GetCharClass();

				    if (!CorrectCC(cc, t, "after copy (CharClass)"))

				      nfail++;

				    cc->Delete();

				    delete ccb1;

				  }

				  EXPECT_EQ(nfail, 0);

				}

				}  // namespace re2

									
										175

third_party/re2/re2/testing/compile_test.cc
									
										vendored
									
				@ -1,175 +0,0 @@

				// Copyright 2007 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Test prog.cc, compile.cc

				#include <string>

				#include <vector>

				#include "util/test.h"

				#include "re2/regexp.h"

				#include "re2/prog.h"

				DEFINE_string(show, "", "regular expression to compile and dump");

				namespace re2 {

				// Simple input/output tests checking that

				// the regexp compiles to the expected code.

				// These are just to sanity check the basic implementation.

				// The real confidence tests happen by testing the NFA/DFA

				// that run the compiled code.

				struct Test {

				  const char* regexp;

				  const char* code;

				};

				static Test tests[] = {

				  { "a",

				    "1. byte [61-61] -> 2\n"

				    "2. match! 0\n" },

				  { "ab",

				    "1. byte [61-61] -> 2\n"

				    "2. byte [62-62] -> 3\n"

				    "3. match! 0\n" },

				  { "a|c",

				    "3. alt -> 1 | 2\n"

				    "1. byte [61-61] -> 4\n"

				    "2. byte [63-63] -> 4\n"

				    "4. match! 0\n" },

				  { "a|b",

				    "1. byte [61-62] -> 2\n"

				    "2. match! 0\n" },

				  { "[ab]",

				    "1. byte [61-62] -> 2\n"

				    "2. match! 0\n" },

				  { "a+",

				    "1. byte [61-61] -> 2\n"

				    "2. alt -> 1 | 3\n"

				    "3. match! 0\n" },

				  { "a+?",

				    "1. byte [61-61] -> 2\n"

				    "2. alt -> 3 | 1\n"

				    "3. match! 0\n" },

				  { "a*",

				    "2. alt -> 1 | 3\n"

				    "1. byte [61-61] -> 2\n"

				    "3. match! 0\n" },

				  { "a*?",

				    "2. alt -> 3 | 1\n"

				    "3. match! 0\n"

				    "1. byte [61-61] -> 2\n" },

				  { "a?",

				    "2. alt -> 1 | 3\n"

				    "1. byte [61-61] -> 3\n"

				    "3. match! 0\n" },

				  { "a??",

				    "2. alt -> 3 | 1\n"

				    "3. match! 0\n"

				    "1. byte [61-61] -> 3\n" },

				  { "a{4}",

				    "1. byte [61-61] -> 2\n"

				    "2. byte [61-61] -> 3\n"

				    "3. byte [61-61] -> 4\n"

				    "4. byte [61-61] -> 5\n"

				    "5. match! 0\n" },

				  { "(a)",

				    "2. capture 2 -> 1\n"

				    "1. byte [61-61] -> 3\n"

				    "3. capture 3 -> 4\n"

				    "4. match! 0\n" },

				  { "(?:a)",

				    "1. byte [61-61] -> 2\n"

				    "2. match! 0\n" },

				  { "",

				    "2. match! 0\n" },

				  { ".",

				    "3. alt -> 1 | 2\n"

				    "1. byte [00-09] -> 4\n"

				    "2. byte [0b-ff] -> 4\n"

				    "4. match! 0\n" },

				  { "[^ab]",

				    "5. alt -> 3 | 4\n"

				    "3. alt -> 1 | 2\n"

				    "4. byte [63-ff] -> 6\n"

				    "1. byte [00-09] -> 6\n"

				    "2. byte [0b-60] -> 6\n"

				    "6. match! 0\n" },

				  { "[Aa]",

				    "1. byte/i [61-61] -> 2\n"

				    "2. match! 0\n" },

				  // Issue 20992936

				  { "[[-`]",

				    "1. byte [5b-60] -> 2\n"

				    "2. match! 0\n" },

				};

				TEST(TestRegexpCompileToProg, Simple) {

				  int failed = 0;

				  for (int i = 0; i < arraysize(tests); i++) {

				    const re2::Test& t = tests[i];

				    Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);

				    if (re == NULL) {

				      LOG(ERROR) << "Cannot parse: " << t.regexp;

				      failed++;

				      continue;

				    }

				    Prog* prog = re->CompileToProg(0);

				    if (prog == NULL) {

				      LOG(ERROR) << "Cannot compile: " << t.regexp;

				      re->Decref();

				      failed++;

				      continue;

				    }

				    CHECK(re->CompileToProg(1) == NULL);

				    string s = prog->Dump();

				    if (s != t.code) {

				      LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;

				      LOG(ERROR) << "Want:\n" << t.code;

				      LOG(ERROR) << "Got:\n" << s;

				      failed++;

				    }

				    delete prog;

				    re->Decref();

				  }

				  EXPECT_EQ(failed, 0);

				}

				// The distinct byte ranges involved in the UTF-8 dot ([^\n]).

				// Once, erroneously split between 0x3f and 0x40 because it is

				// a 6-bit boundary.

				static struct UTF8ByteRange {

				  int lo;

				  int hi;

				} utf8ranges[] = {

				  { 0x00, 0x09 },

				  { 0x0A, 0x0A },

				  { 0x10, 0x7F },

				  { 0x80, 0x8F },

				  { 0x90, 0x9F },

				  { 0xA0, 0xBF },

				  { 0xC0, 0xC1 },

				  { 0xC2, 0xDF },

				  { 0xE0, 0xE0 },

				  { 0xE1, 0xEF },

				  { 0xF0, 0xF0 },

				  { 0xF1, 0xF3 },

				  { 0xF4, 0xF4 },

				  { 0xF5, 0xFF },

				};

				TEST(TestCompile, ByteRanges) {

				  Regexp* re = Regexp::Parse(".", Regexp::PerlX, NULL);

				  EXPECT_TRUE(re != NULL);

				  Prog* prog = re->CompileToProg(0);

				  EXPECT_TRUE(prog != NULL);

				  EXPECT_EQ(prog->bytemap_range(), arraysize(utf8ranges));

				  for (int i = 0; i < arraysize(utf8ranges); i++)

				    for (int j = utf8ranges[i].lo; j <= utf8ranges[i].hi; j++)

				      EXPECT_EQ(prog->bytemap()[j], i) << " byte " << j;

				  delete prog;

				  re->Decref();

				}

				}  // namespace re2

									
										348

third_party/re2/re2/testing/dfa_test.cc
									
										vendored
									
				@ -1,348 +0,0 @@

				// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				#include "util/thread.h"

				#include "util/test.h"

				#include "re2/prog.h"

				#include "re2/re2.h"

				#include "re2/regexp.h"

				#include "re2/testing/regexp_generator.h"

				#include "re2/testing/string_generator.h"

				static const bool UsingMallocCounter = false;

				DECLARE_bool(re2_dfa_bail_when_slow);

				DEFINE_int32(size, 8, "log2(number of DFA nodes)");

				DEFINE_int32(repeat, 2, "Repetition count.");

				DEFINE_int32(threads, 4, "number of threads");

				namespace re2 {

				// Check that multithreaded access to DFA class works.

				// Helper thread: builds entire DFA for prog.

				class BuildThread : public Thread {

				 public:

				  BuildThread(Prog* prog) : prog_(prog) {}

				  virtual void Run() {

				    CHECK(prog_->BuildEntireDFA(Prog::kFirstMatch));

				  }

				 private:

				  Prog* prog_;

				};

				TEST(Multithreaded, BuildEntireDFA) {

				  // Create regexp with 2^FLAGS_size states in DFA.

				  string s = "a";

				  for (int i = 0; i < FLAGS_size; i++)

				    s += "[ab]";

				  s += "b";

				  // Check that single-threaded code works.

				  {

				    //LOG(INFO) << s;

				    Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);

				    CHECK(re);

				    Prog* prog = re->CompileToProg(0);

				    CHECK(prog);

				    BuildThread* t = new BuildThread(prog);

				    t->SetJoinable(true);

				    t->Start();

				    t->Join();

				    delete t;

				    delete prog;

				    re->Decref();

				  }

				  // Build the DFA simultaneously in a bunch of threads.

				  for (int i = 0; i < FLAGS_repeat; i++) {

				    Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);

				    CHECK(re);

				    Prog* prog = re->CompileToProg(0);

				    CHECK(prog);

				    vector<BuildThread*> threads;

				    for (int j = 0; j < FLAGS_threads; j++) {

				      BuildThread *t = new BuildThread(prog);

				      t->SetJoinable(true);

				      threads.push_back(t);

				    }

				    for (int j = 0; j < FLAGS_threads; j++)

				      threads[j]->Start();

				    for (int j = 0; j < FLAGS_threads; j++) {

				      threads[j]->Join();

				      delete threads[j];

				    }

				    // One more compile, to make sure everything is okay.

				    prog->BuildEntireDFA(Prog::kFirstMatch);

				    delete prog;

				    re->Decref();

				  }

				}

				// Check that DFA size requirements are followed.

				// BuildEntireDFA will, like SearchDFA, stop building out

				// the DFA once the memory limits are reached.

				TEST(SingleThreaded, BuildEntireDFA) {

				  // Create regexp with 2^30 states in DFA.

				  string s = "a";

				  for (int i = 0; i < 30; i++)

				    s += "[ab]";

				  s += "b";

				  Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);

				  CHECK(re);

				  int max = 24;

				  for (int i = 17; i < max; i++) {

				    int64 limit = 1<<i;

				    int64 usage;

				    //int64 progusage, dfamem;

				    {

				      testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);

				      Prog* prog = re->CompileToProg(limit);

				      CHECK(prog);

				      //progusage = m.HeapGrowth();

				      //dfamem = prog->dfa_mem();

				      prog->BuildEntireDFA(Prog::kFirstMatch);

				      prog->BuildEntireDFA(Prog::kLongestMatch);

				      usage = m.HeapGrowth();

				      delete prog;

				    }

				    if (!UsingMallocCounter)

				      continue;

				    //LOG(INFO) << "limit " << limit << ", "

				    //          << "prog usage " << progusage << ", "

				    //          << "DFA budget " << dfamem << ", "

				    //          << "total " << usage;

				    // Tolerate +/- 10%.

				    CHECK_GT(usage, limit*9/10);

				    CHECK_LT(usage, limit*11/10);

				  }

				  re->Decref();

				}

				// Generates and returns a string over binary alphabet {0,1} that contains

				// all possible binary sequences of length n as subsequences.  The obvious

				// brute force method would generate a string of length n * 2^n, but this

				// generates a string of length n + 2^n - 1 called a De Bruijn cycle.

				// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.

				// Such a string is useful for testing a DFA.  If you have a DFA

				// where distinct last n bytes implies distinct states, then running on a

				// DeBruijn string causes the DFA to need to create a new state at every

				// position in the input, never reusing any states until it gets to the

				// end of the string.  This is the worst possible case for DFA execution.

				static string DeBruijnString(int n) {

				  CHECK_LT(n, static_cast<int>(8*sizeof(int)));

				  CHECK_GT(n, 0);

				  vector<bool> did(1<<n);

				  for (int i = 0; i < 1<<n; i++)

				    did[i] = false;

				  string s;

				  for (int i = 0; i < n-1; i++)

				    s.append("0");

				  int bits = 0;

				  int mask = (1<<n) - 1;

				  for (int i = 0; i < (1<<n); i++) {

				    bits <<= 1;

				    bits &= mask;

				    if (!did[bits|1]) {

				      bits |= 1;

				      s.append("1");

				    } else {

				      s.append("0");

				    }

				    CHECK(!did[bits]);

				    did[bits] = true;

				  }

				  return s;

				}

				// Test that the DFA gets the right result even if it runs

				// out of memory during a search.  The regular expression

				// 0[01]{n}$ matches a binary string of 0s and 1s only if

				// the (n+1)th-to-last character is a 0.  Matching this in

				// a single forward pass (as done by the DFA) requires

				// keeping one bit for each of the last n+1 characters

				// (whether each was a 0), or 2^(n+1) possible states.

				// If we run this regexp to search in a string that contains

				// every possible n-character binary string as a substring,

				// then it will have to run through at least 2^n states.

				// States are big data structures -- certainly more than 1 byte --

				// so if the DFA can search correctly while staying within a

				// 2^n byte limit, it must be handling out-of-memory conditions

				// gracefully.

				TEST(SingleThreaded, SearchDFA) {

				  // Choice of n is mostly arbitrary, except that:

				  //   * making n too big makes the test run for too long.

				  //   * making n too small makes the DFA refuse to run,

				  //     because it has so little memory compared to the program size.

				  // Empirically, n = 18 is a good compromise between the two.

				  const int n = 18;

				  Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),

				                             Regexp::LikePerl, NULL);

				  CHECK(re);

				  // The De Bruijn string for n ends with a 1 followed by n 0s in a row,

				  // which is not a match for 0[01]{n}$.  Adding one more 0 is a match.

				  string no_match = DeBruijnString(n);

				  string match = no_match + "0";

				  // The De Bruijn string is the worst case input for this regexp.

				  // By default, the DFA will notice that it is flushing its cache

				  // too frequently and will bail out early, so that RE2 can use the

				  // NFA implementation instead.  (The DFA loses its speed advantage

				  // if it can't get a good cache hit rate.)

				  // Tell the DFA to trudge along instead.

				  FLAGS_re2_dfa_bail_when_slow = false;

				  int64 usage;

				  int64 peak_usage;

				  {

				    testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);

				    Prog* prog = re->CompileToProg(1<<n);

				    CHECK(prog);

				    for (int i = 0; i < 10; i++) {

				      bool matched, failed = false;

				      matched = prog->SearchDFA(match, NULL,

				                                Prog::kUnanchored, Prog::kFirstMatch,

				                                NULL, &failed, NULL);

				      CHECK(!failed);

				      CHECK(matched);

				      matched = prog->SearchDFA(no_match, NULL,

				                                Prog::kUnanchored, Prog::kFirstMatch,

				                                NULL, &failed, NULL);

				      CHECK(!failed);

				      CHECK(!matched);

				    }

				    usage = m.HeapGrowth();

				    peak_usage = m.PeakHeapGrowth();

				    delete prog;

				  }

				  if (!UsingMallocCounter)

				    return;

				  //LOG(INFO) << "usage " << usage << ", "

				  //          << "peak usage " << peak_usage;

				  CHECK_LT(usage, 1<<n);

				  CHECK_LT(peak_usage, 1<<n);

				  re->Decref();

				}

				// Helper thread: searches for match, which should match,

				// and no_match, which should not.

				class SearchThread : public Thread {

				 public:

				  SearchThread(Prog* prog, const StringPiece& match,

				               const StringPiece& no_match)

				    : prog_(prog), match_(match), no_match_(no_match) {}

				  virtual void Run() {

				    for (int i = 0; i < 2; i++) {

				      bool matched, failed = false;

				      matched = prog_->SearchDFA(match_, NULL,

				                                 Prog::kUnanchored, Prog::kFirstMatch,

				                                 NULL, &failed, NULL);

				      CHECK(!failed);

				      CHECK(matched);

				      matched = prog_->SearchDFA(no_match_, NULL,

				                                 Prog::kUnanchored, Prog::kFirstMatch,

				                                 NULL, &failed, NULL);

				      CHECK(!failed);

				      CHECK(!matched);

				    }

				  }

				 private:

				  Prog* prog_;

				  StringPiece match_;

				  StringPiece no_match_;

				};

				TEST(Multithreaded, SearchDFA) {

				  // Same as single-threaded test above.

				  const int n = 18;

				  Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),

				                             Regexp::LikePerl, NULL);

				  CHECK(re);

				  string no_match = DeBruijnString(n);

				  string match = no_match + "0";

				  FLAGS_re2_dfa_bail_when_slow = false;

				  // Check that single-threaded code works.

				  {

				    Prog* prog = re->CompileToProg(1<<n);

				    CHECK(prog);

				    SearchThread* t = new SearchThread(prog, match, no_match);

				    t->SetJoinable(true);

				    t->Start();

				    t->Join();

				    delete t;

				    delete prog;

				  }

				  // Run the search simultaneously in a bunch of threads.

				  // Reuse same flags for Multithreaded.BuildDFA above.

				  for (int i = 0; i < FLAGS_repeat; i++) {

				    //LOG(INFO) << "Search " << i;

				    Prog* prog = re->CompileToProg(1<<n);

				    CHECK(prog);

				    vector<SearchThread*> threads;

				    for (int j = 0; j < FLAGS_threads; j++) {

				      SearchThread *t = new SearchThread(prog, match, no_match);

				      t->SetJoinable(true);

				      threads.push_back(t);

				    }

				    for (int j = 0; j < FLAGS_threads; j++)

				      threads[j]->Start();

				    for (int j = 0; j < FLAGS_threads; j++) {

				      threads[j]->Join();

				      delete threads[j];

				    }

				    delete prog;

				  }

				  re->Decref();

				}

				struct ReverseTest {

				  const char *regexp;

				  const char *text;

				  bool match;

				};

				// Test that reverse DFA handles anchored/unanchored correctly.

				// It's in the DFA interface but not used by RE2.

				ReverseTest reverse_tests[] = {

				  { "\\A(a|b)", "abc", true },

				  { "(a|b)\\z", "cba", true },

				  { "\\A(a|b)", "cba", false },

				  { "(a|b)\\z", "abc", false },

				};

				TEST(DFA, ReverseMatch) {

				  int nfail = 0;

				  for (int i = 0; i < arraysize(reverse_tests); i++) {

				    const ReverseTest& t = reverse_tests[i];

				    Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);

				    CHECK(re);

				    Prog *prog = re->CompileToReverseProg(0);

				    CHECK(prog);

				    bool failed = false;

				    bool matched = prog->SearchDFA(t.text, NULL, Prog::kUnanchored, Prog::kFirstMatch, NULL, &failed, NULL);

				    if (matched != t.match) {

				      LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;

				      nfail++;

				    }

				    delete prog;

				    re->Decref();

				  }

				  EXPECT_EQ(nfail, 0);

				}

				}  // namespace re2

									
										166

third_party/re2/re2/testing/dump.cc
									
										vendored
									
				@ -1,166 +0,0 @@

				// Copyright 2006 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Dump the regexp into a string showing structure.

				// Tested by parse_unittest.cc

				// This function traverses the regexp recursively,

				// meaning that on inputs like Regexp::Simplify of

				// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},

				// it takes time and space exponential in the size of the

				// original regular expression.  It can also use stack space

				// linear in the size of the regular expression for inputs

				// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.

				// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.

				// As a result, Dump is provided only in the testing

				// library (see BUILD).

				#include <string>

				#include <vector>

				#include "util/test.h"

				#include "re2/stringpiece.h"

				#include "re2/regexp.h"

				// Cause a link error if this file is used outside of testing.

				DECLARE_string(test_tmpdir);

				namespace re2 {

				static const char* kOpcodeNames[] = {

				  "bad",

				  "no",

				  "emp",

				  "lit",

				  "str",

				  "cat",

				  "alt",

				  "star",

				  "plus",

				  "que",

				  "rep",

				  "cap",

				  "dot",

				  "byte",

				  "bol",

				  "eol",

				  "wb",   // kRegexpWordBoundary

				  "nwb",  // kRegexpNoWordBoundary

				  "bot",

				  "eot",

				  "cc",

				  "match",

				};

				// Create string representation of regexp with explicit structure.

				// Nothing pretty, just for testing.

				static void DumpRegexpAppending(Regexp* re, string* s) {

				  if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {

				    StringAppendF(s, "op%d", re->op());

				  } else {

				    switch (re->op()) {

				      default:

				        break;

				      case kRegexpStar:

				      case kRegexpPlus:

				      case kRegexpQuest:

				      case kRegexpRepeat:

				        if (re->parse_flags() & Regexp::NonGreedy)

				          s->append("n");

				        break;

				    }

				    s->append(kOpcodeNames[re->op()]);

				    if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {

				      Rune r = re->rune();

				      if ('a' <= r && r <= 'z')

				        s->append("fold");

				    }

				    if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {

				      for (int i = 0; i < re->nrunes(); i++) {

				        Rune r = re->runes()[i];

				        if ('a' <= r && r <= 'z') {

				          s->append("fold");

				          break;

				        }

				      }

				    }

				  }

				  s->append("{");

				  switch (re->op()) {

				    default:

				      break;

				    case kRegexpEndText:

				      if (!(re->parse_flags() & Regexp::WasDollar)) {

				        s->append("\\z");

				      }

				      break;

				    case kRegexpLiteral: {

				      Rune r = re->rune();

				      char buf[UTFmax+1];

				      buf[runetochar(buf, &r)] = 0;

				      s->append(buf);

				      break;

				    }

				    case kRegexpLiteralString:

				      for (int i = 0; i < re->nrunes(); i++) {

				        Rune r = re->runes()[i];

				        char buf[UTFmax+1];

				        buf[runetochar(buf, &r)] = 0;

				        s->append(buf);

				      }

				      break;

				    case kRegexpConcat:

				    case kRegexpAlternate:

				      for (int i = 0; i < re->nsub(); i++)

				        DumpRegexpAppending(re->sub()[i], s);

				      break;

				    case kRegexpStar:

				    case kRegexpPlus:

				    case kRegexpQuest:

				      DumpRegexpAppending(re->sub()[0], s);

				      break;

				    case kRegexpCapture:

				      if (re->cap() == 0)

				        LOG(DFATAL) << "kRegexpCapture cap() == 0";

				      if (re->name()) {

				        s->append(*re->name());

				        s->append(":");

				      }

				      DumpRegexpAppending(re->sub()[0], s);

				      break;

				    case kRegexpRepeat:

				      s->append(StringPrintf("%d,%d ", re->min(), re->max()));

				      DumpRegexpAppending(re->sub()[0], s);

				      break;

				    case kRegexpCharClass: {

				      string sep;

				      for (CharClass::iterator it = re->cc()->begin();

				           it != re->cc()->end(); ++it) {

				        RuneRange rr = *it;

				        s->append(sep);

				        if (rr.lo == rr.hi)

				          s->append(StringPrintf("%#x", rr.lo));

				        else

				          s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));

				        sep = " ";

				      }

				      break;

				    }

				  }

				  s->append("}");

				}

				string Regexp::Dump() {

				  string s;

				  // Make sure being called from a unit test.

				  if (FLAGS_test_tmpdir.empty()) {

				    LOG(ERROR) << "Cannot use except for testing.";

				    return s;

				  }

				  DumpRegexpAppending(this, &s);

				  return s;

				}

				}  // namespace re2

									
										42

third_party/re2/re2/testing/exhaustive1_test.cc
									
										vendored
									
				@ -1,42 +0,0 @@

				// Copyright 2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Exhaustive testing of regular expression matching.

				#include "util/test.h"

				#include "re2/testing/exhaustive_tester.h"

				DECLARE_string(regexp_engines);

				namespace re2 {

				// Test simple repetition operators

				TEST(Repetition, Simple) {

				  vector<string> ops = Split(" ",

				    "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} "

				    "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} "

				    "%s* %s+ %s? %s*? %s+? %s??");

				  ExhaustiveTest(3, 2, Explode("abc."), ops,

				                 6, Explode("ab"), "(?:%s)", "");

				  ExhaustiveTest(3, 2, Explode("abc."), ops,

				                 40, Explode("a"), "(?:%s)", "");

				}

				// Test capturing parens -- (a) -- inside repetition operators

				TEST(Repetition, Capturing) {

				  vector<string> ops = Split(" ",

				    "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} "

				    "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} "

				    "%s* %s+ %s? %s*? %s+? %s??");

				  ExhaustiveTest(3, 2, Split(" ", "a (a) b"), ops,

				                 7, Explode("ab"), "(?:%s)", "");

				  // This would be a great test, but it runs forever when PCRE is enabled.

				  if (strstr("PCRE", FLAGS_regexp_engines.c_str()) == NULL)

				    ExhaustiveTest(4, 3, Split(" ", "a (a)"), ops,

				                   100, Explode("a"), "(?:%s)", "");

				}

				}  // namespace re2

									
										70

third_party/re2/re2/testing/exhaustive2_test.cc
									
										vendored
									
				@ -1,70 +0,0 @@

				// Copyright 2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Exhaustive testing of regular expression matching.

				#include "util/test.h"

				#include "re2/re2.h"

				#include "re2/testing/exhaustive_tester.h"

				DECLARE_string(regexp_engines);

				namespace re2 {

				// Test empty string matches (aka "(?:)")

				TEST(EmptyString, Exhaustive) {

				  ExhaustiveTest(2, 2, Split(" ", "(?:) a"),

				                 RegexpGenerator::EgrepOps(),

				                 5, Split("", "ab"), "", "");

				}

				// Test escaped versions of regexp syntax.

				TEST(Punctuation, Literals) {

				  vector<string> alphabet = Explode("()*+?{}[]\\^$.");

				  vector<string> escaped = alphabet;

				  for (size_t i = 0; i < escaped.size(); i++)

				    escaped[i] = "\\" + escaped[i];

				  ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),

				                 2, alphabet, "", "");

				}

				// Test ^ $ . \A \z in presence of line endings.

				// Have to wrap the empty-width ones in (?:) so that

				// they can be repeated -- PCRE rejects ^* but allows (?:^)*

				TEST(LineEnds, Exhaustive) {

				  ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),

				                 RegexpGenerator::EgrepOps(),

				                 4, Explode("ab\n"), "", "");

				}

				// Test what does and does not match \n.

				// This would be a good test, except that PCRE seems to have a bug:

				// in single-byte character set mode (the default),

				// [^a] matches \n, but in UTF-8 mode it does not.

				// So when we run the test, the tester complains that

				// we don't agree with PCRE, but it's PCRE that is at fault.

				// For what it's worth, Perl gets this right (matches

				// regardless of whether UTF-8 input is selected):

				//

				//     #!/usr/bin/perl

				//     use POSIX qw(locale_h);

				//     print "matches in latin1\n" if "\n" =~ /[^a]/;

				//     setlocale("en_US.utf8");

				//     print "matches in utf8\n" if "\n" =~ /[^a]/;

				//

				// The rule chosen for RE2 is that by default, like Perl,

				// dot does not match \n but negated character classes [^a] do.

				// (?s) will allow dot to match \n; there is no way in RE2

				// to stop [^a] from matching \n, though the underlying library

				// provides a mechanism, and RE2 could add new syntax if needed.

				//

				// TEST(Newlines, Exhaustive) {

				//   vector<string> empty_vector;

				//   ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),

				//                  RegexpGenerator::EgrepOps(),

				//                  4, Explode("a\n"), "");

				// }

				}  // namespace re2

									
										94

third_party/re2/re2/testing/exhaustive3_test.cc
									
										vendored
									
				@ -1,94 +0,0 @@

				// Copyright 2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Exhaustive testing of regular expression matching.

				#include "util/test.h"

				#include "re2/testing/exhaustive_tester.h"

				namespace re2 {

				// Test simple character classes by themselves.

				TEST(CharacterClasses, Exhaustive) {

				  vector<string> atoms = Split(" ",

				    "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");

				  ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),

				                 5, Explode("ab"), "", "");

				}

				// Test simple character classes inside a___b (for example, a[a]b).

				TEST(CharacterClasses, ExhaustiveAB) {

				  vector<string> atoms = Split(" ",

				    "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");

				  ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),

				                 5, Explode("ab"), "a%sb", "");

				}

				// Returns UTF8 for Rune r

				static string UTF8(Rune r) {

				  char buf[UTFmax+1];

				  buf[runetochar(buf, &r)] = 0;

				  return string(buf);

				}

				// Returns a vector of "interesting" UTF8 characters.

				// Unicode is now too big to just return all of them,

				// so UTF8Characters return a set likely to be good test cases.

				static const vector<string>& InterestingUTF8() {

				  static bool init;

				  static vector<string> v;

				  if (init)

				    return v;

				  init = true;

				  // All the Latin1 equivalents are interesting.

				  for (int i = 1; i < 256; i++)

				    v.push_back(UTF8(i));

				  // After that, the codes near bit boundaries are

				  // interesting, because they span byte sequence lengths.

				  for (int j = 0; j < 8; j++)

				    v.push_back(UTF8(256 + j));

				  for (int i = 512; i < Runemax; i <<= 1)

				    for (int j = -8; j < 8; j++)

				      v.push_back(UTF8(i + j));

				  // The codes near Runemax, including Runemax itself, are interesting.

				  for (int j = -8; j <= 0; j++)

				    v.push_back(UTF8(Runemax + j));

				  return v;

				}

				// Test interesting UTF-8 characters against character classes.

				TEST(InterestingUTF8, SingleOps) {

				  vector<string> atoms = Split(" ",

				    ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "

				    "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "

				    "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "

				    "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");

				  vector<string> ops;  // no ops

				  ExhaustiveTest(1, 0, atoms, ops,

				                 1, InterestingUTF8(), "", "");

				}

				// Test interesting UTF-8 characters against character classes,

				// but wrap everything inside AB.

				TEST(InterestingUTF8, AB) {

				  vector<string> atoms = Split(" ",

				    ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "

				    "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "

				    "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "

				    "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");

				  vector<string> ops;  // no ops

				  vector<string> alpha = InterestingUTF8();

				  for (size_t i = 0; i < alpha.size(); i++)

				    alpha[i] = "a" + alpha[i] + "b";

				  ExhaustiveTest(1, 0, atoms, ops,

				                 1, alpha, "a%sb", "");

				}

				}  // namespace re2

									
										38

third_party/re2/re2/testing/exhaustive_test.cc
									
										vendored
									
				@ -1,38 +0,0 @@

				// Copyright 2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Exhaustive testing of regular expression matching.

				#include "util/test.h"

				#include "re2/testing/exhaustive_tester.h"

				namespace re2 {

				DECLARE_string(regexp_engines);

				// Test very simple expressions.

				TEST(EgrepLiterals, Lowercase) {

				  EgrepTest(3, 2, "abc.", 3, "abc", "");

				}

				// Test mixed-case expressions.

				TEST(EgrepLiterals, MixedCase) {

				  EgrepTest(3, 2, "AaBb.", 2, "AaBb", "");

				}

				// Test mixed-case in case-insensitive mode.

				TEST(EgrepLiterals, FoldCase) {

				  // The punctuation characters surround A-Z and a-z

				  // in the ASCII table.  This looks for bugs in the

				  // bytemap range code in the DFA.

				  EgrepTest(3, 2, "abAB.", 2, "aBc@_~", "(?i:%s)");

				}

				// Test very simple expressions.

				TEST(EgrepLiterals, UTF8) {

				  EgrepTest(3, 2, "ab.", 4, "a\xE2\x98\xBA", "");

				}

				}  // namespace re2

									
										188

third_party/re2/re2/testing/exhaustive_tester.cc
									
										vendored
									
				@ -1,188 +0,0 @@

				// Copyright 2008 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				// Exhaustive testing of regular expression matching.

				// Each test picks an alphabet (e.g., "abc"), a maximum string length,

				// a maximum regular expression length, and a maximum number of letters

				// that can appear in the regular expression.  Given these parameters,

				// it tries every possible regular expression and string, verifying that

				// the NFA, DFA, and a trivial backtracking implementation agree about

				// the location of the match.

				#include <stdlib.h>

				#include <stdio.h>

				#ifndef LOGGING

				#define LOGGING 0

				#endif

				#include "util/test.h"

				#include "re2/testing/exhaustive_tester.h"

				#include "re2/testing/tester.h"

				DEFINE_bool(show_regexps, false, "show regexps during testing");

				DEFINE_int32(max_bad_regexp_inputs, 1,

				             "Stop testing a regular expression after finding this many "

				             "strings that break it.");

				// Compiled in debug mode, the usual tests run for over an hour.

				// Have to cut it down to make the unit test machines happy.

				DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");

				namespace re2 {

				static char* escape(const StringPiece& sp) {

				  static char buf[512];

				  char* p = buf;

				  *p++ = '\"';

				  for (int i = 0; i < sp.size(); i++) {

				    if(p+5 >= buf+sizeof buf)

				      LOG(FATAL) << "ExhaustiveTester escape: too long";

				    if(sp[i] == '\\' || sp[i] == '\"') {

				      *p++ = '\\';

				      *p++ = sp[i];

				    } else if(sp[i] == '\n') {

				      *p++ = '\\';

				      *p++ = 'n';

				    } else {

				      *p++ = sp[i];

				    }

				  }

				  *p++ = '\"';

				  *p = '\0';

				  return buf;

				}

				static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {

				  if (!re.Match(input, 0, input.size(), anchor, m, n)) {

				    printf("-");

				    return;

				  }

				  for (int i = 0; i < n; i++) {

				    if (i > 0)

				      printf(" ");

				    if (m[i].begin() == NULL)

				      printf("-");

				    else

				      printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));

				  }

				}

				// Processes a single generated regexp.

				// Compiles it using Regexp interface and PCRE, and then

				// checks that NFA, DFA, and PCRE all return the same results.

				void ExhaustiveTester::HandleRegexp(const string& const_regexp) {

				  regexps_++;

				  string regexp = const_regexp;

				  if (!topwrapper_.empty())

				    regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());

				  if (FLAGS_show_regexps) {

				    printf("\r%s", regexp.c_str());

				    fflush(stdout);

				  }

				  if (LOGGING) {

				    // Write out test cases and answers for use in testing

				    // other implementations, such as Go's regexp package.

				    if (randomstrings_)

				      LOG(ERROR) << "Cannot log with random strings.";

				    if (regexps_ == 1) {  // first

				      printf("strings\n");

				      strgen_.Reset();

				      while (strgen_.HasNext())

				        printf("%s\n", escape(strgen_.Next()));

				      printf("regexps\n");

				    }

				    printf("%s\n", escape(regexp));

				    RE2 re(regexp);

				    RE2::Options longest;

				    longest.set_longest_match(true);

				    RE2 relongest(regexp, longest);

				    int ngroup = re.NumberOfCapturingGroups()+1;

				    StringPiece* group = new StringPiece[ngroup];

				    strgen_.Reset();

				    while (strgen_.HasNext()) {

				      StringPiece input = strgen_.Next();

				      PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);

				      printf(";");

				      PrintResult(re, input, RE2::UNANCHORED, group, ngroup);

				      printf(";");

				      PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);

				      printf(";");

				      PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);

				      printf("\n");

				    }

				    delete[] group;

				    return;

				  }

				  Tester tester(regexp);

				  if (tester.error())

				    return;

				  strgen_.Reset();

				  strgen_.GenerateNULL();

				  if (randomstrings_)

				    strgen_.Random(stringseed_, stringcount_);

				  int bad_inputs = 0;

				  while (strgen_.HasNext()) {

				    tests_++;

				    if (!tester.TestInput(strgen_.Next())) {

				      failures_++;

				      if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)

				        break;

				    }

				  }

				}

				// Runs an exhaustive test on the given parameters.

				void ExhaustiveTest(int maxatoms, int maxops,

				                    const vector<string>& alphabet,

				                    const vector<string>& ops,

				                    int maxstrlen, const vector<string>& stralphabet,

				                    const string& wrapper,

				                    const string& topwrapper) {

				  if (RE2_DEBUG_MODE && FLAGS_quick_debug_mode) {

				    if (maxatoms > 1)

				      maxatoms--;

				    if (maxops > 1)

				      maxops--;

				    if (maxstrlen > 1)

				      maxstrlen--;

				  }

				  ExhaustiveTester t(maxatoms, maxops, alphabet, ops,

				                     maxstrlen, stralphabet, wrapper,

				                     topwrapper);

				  t.Generate();

				  if (!LOGGING) {

				    printf("%d regexps, %d tests, %d failures [%d/%d str]\n",

				           t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());

				  }

				  EXPECT_EQ(0, t.failures());

				}

				// Runs an exhaustive test using the given parameters and

				// the basic egrep operators.

				void EgrepTest(int maxatoms, int maxops, const string& alphabet,

				               int maxstrlen, const string& stralphabet,

				               const string& wrapper) {

				  const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };

				  for (int i = 0; i < arraysize(tops); i++) {

				    ExhaustiveTest(maxatoms, maxops,

				                   Split("", alphabet),

				                   RegexpGenerator::EgrepOps(),

				                   maxstrlen,

				                   Split("", stralphabet),

				                   wrapper,

				                   tops[i]);

				  }

				}

				}  // namespace re2

									
										95

third_party/re2/re2/testing/exhaustive_tester.h
									
										vendored
									
				@ -1,95 +0,0 @@

				// Copyright 2009 The RE2 Authors.  All Rights Reserved.

				// Use of this source code is governed by a BSD-style

				// license that can be found in the LICENSE file.

				#ifndef RE2_TESTING_EXHAUSTIVE_TESTER_H__

				#define RE2_TESTING_EXHAUSTIVE_TESTER_H__

				#include <string>

				#include <vector>

				#include "util/util.h"

				#include "re2/testing/regexp_generator.h"

				#include "re2/testing/string_generator.h"

				namespace re2 {

				#if !defined(NDEBUG)

				// We are in a debug build.

				const bool RE2_DEBUG_MODE = true;

				#elif ADDRESS_SANITIZER || MEMORY_SANITIZER || THREAD_SANITIZER

				// Not a debug build, but still under sanitizers.

				const bool RE2_DEBUG_MODE = true;

				#else

				const bool RE2_DEBUG_MODE = false;

				#endif

				// Exhaustive regular expression test: generate all regexps within parameters,

				// then generate all strings of a given length over a given alphabet,

				// then check that NFA, DFA, and PCRE agree about whether each regexp matches

				// each possible string, and if so, where the match is.

				//

				// Can also be used in a "random" mode that generates a given number

				// of random regexp and strings, allowing testing of larger expressions

				// and inputs.

				class ExhaustiveTester : public RegexpGenerator {

				 public:

				  ExhaustiveTester(int maxatoms,

				                   int maxops,

				                   const vector<string>& alphabet,

				                   const vector<string>& ops,

				                   int maxstrlen,

				                   const vector<string>& stralphabet,

				                   const string& wrapper,

				                   const string& topwrapper)

				    : RegexpGenerator(maxatoms, maxops, alphabet, ops),

				      strgen_(maxstrlen, stralphabet),

				      wrapper_(wrapper),

				      topwrapper_(topwrapper),

				      regexps_(0), tests_(0), failures_(0),

				      randomstrings_(0), stringseed_(0), stringcount_(0)  { }

				  int regexps()  { return regexps_; }

				  int tests()    { return tests_; }

				  int failures() { return failures_; }

				  // Needed for RegexpGenerator interface.

				  void HandleRegexp(const string& regexp);

				  // Causes testing to generate random input strings.

				  void RandomStrings(int32 seed, int32 count) {

				    randomstrings_ = true;

				    stringseed_ = seed;

				    stringcount_ = count;

				  }

				 private:

				  StringGenerator strgen_;

				  string wrapper_;      // Regexp wrapper - either empty or has one %s.

				  string topwrapper_;   // Regexp top-level wrapper.

				  int regexps_;   // Number of HandleRegexp calls

				  int tests_;     // Number of regexp tests.

				  int failures_;  // Number of tests failed.

				  bool randomstrings_;  // Whether to use random strings

				  int32 stringseed_;    // If so, the seed.

				  int stringcount_;     // If so, how many to generate.

				  DISALLOW_COPY_AND_ASSIGN(ExhaustiveTester);

				};

				// Runs an exhaustive test on the given parameters.

				void ExhaustiveTest(int maxatoms, int maxops,

				                    const vector<string>& alphabet,

				                    const vector<string>& ops,

				                    int maxstrlen, const vector<string>& stralphabet,

				                    const string& wrapper,

				                    const string& topwrapper);

				// Runs an exhaustive test using the given parameters and

				// the basic egrep operators.

				void EgrepTest(int maxatoms, int maxops, const string& alphabet,

				               int maxstrlen, const string& stralphabet,

				               const string& wrapper);

				}  // namespace re2

				#endif  // RE2_TESTING_EXHAUSTIVE_TESTER_H__