0

Simple URI: A class representing simplified URI - part 2

This is a part of implementation of class Uri. This CL contains
implementation of getters along with algorithm to encode disallowed
characters with % sign.
Other CLs:
 - https://crrev.com/c/2103143
 - https://crrev.com/c/2123640

BUG=chromium:821497
TEST=on my workstation

Change-Id: Iabe72b7d9907743345a886fe3ffb2dc19b1b76b2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2118426
Commit-Queue: Piotr Pawliczek <pawliczek@chromium.org>
Reviewed-by: Sean Kau <skau@chromium.org>
Cr-Commit-Position: refs/heads/master@{#758049}
This commit is contained in:
Piotr Pawliczek
2020-04-09 22:36:09 +00:00
committed by Commit Bot
parent 4d75a70073
commit 42efb15a15
3 changed files with 550 additions and 1 deletions
chromeos/printing

416
chromeos/printing/uri.cc Normal file

@ -0,0 +1,416 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/printing/uri.h"
#include <algorithm>
#include "base/logging.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "uri_impl.h"
namespace chromeos {
namespace {
constexpr unsigned char kFirstPrintableChar = 32;
constexpr unsigned char kLastPrintableChar = 126;
// Convert an input value 0-15 to a hex digit (0-9,A-H).
char ToHexDigit(uint8_t val) {
DCHECK(val < 16);
if (val < 10)
return ('0' + val);
return ('A' + val - 10);
}
// Helper class used to encode an input strings by %-escaping disallowed bytes.
class Encoder {
public:
// Constructor. The set of allowed characters = STD_CHARS + |additional|.
Encoder(const std::string& additional) { Allow(additional); }
// Extends the set of allowed characters by |chars|.
// All characters in |chars| must be printable ASCII characters.
void Allow(const std::string& chars) {
for (char c : chars) {
const unsigned char uc = static_cast<unsigned char>(c);
DCHECK_GE(uc, kFirstPrintableChar);
DCHECK_LE(uc, kLastPrintableChar);
allowed_[uc - kFirstPrintableChar] = true;
}
}
// Removes |chars| from the set of allowed characters.
// All characters in |chars| must be printable ASCII characters.
void Disallow(const std::string& chars) {
for (char c : chars) {
const unsigned char uc = static_cast<unsigned char>(c);
DCHECK_GE(uc, kFirstPrintableChar);
DCHECK_LE(uc, kLastPrintableChar);
allowed_[uc - kFirstPrintableChar] = false;
}
}
// Encodes the input string |str| and appends the output string to |out|.
// |out| cannot be nullptr. |str| may contain UTF-8 characters, but cannot
// include ASCII characters from range [0,kFirstPrintablechar).
void EncodeAndAppend(const std::string& str, std::string* out) const {
for (auto it = str.begin(); it < str.end(); ++it) {
const unsigned char uc = static_cast<unsigned char>(*it);
DCHECK_GE(uc, kFirstPrintableChar);
if (uc <= kLastPrintableChar && allowed_[uc - kFirstPrintableChar]) {
out->push_back(*it);
} else {
out->push_back('%');
out->push_back(ToHexDigit(uc >> 4));
out->push_back(ToHexDigit(uc & 0x0f));
}
}
}
// Encodes the input string |str| and returns it. |str| may contain UTF-8
// characters, but cannot include ASCII characters from range
// [0,kFirstPrintablechar).
std::string Encode(const std::string& str) const {
std::string out;
out.reserve(str.size() * 5 / 4);
EncodeAndAppend(str, &out);
return out;
}
private:
// The array of allowed characters. The first element corresponds to ASCII
// value 0x20 (space), the last one to 0x7E (~). Default value contains
// STD_CHARS.
// Clang formatting is deactivated for this piece of code.
// clang-format off
std::array<bool,95> allowed_ =
// ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
{ 0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0
//@ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1
//` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1
};
// clang-format on
};
// Returns true if given string has characters outside ASCII (outside 0x00-07F).
bool HasNonASCII(const std::string& str) {
return std::any_of(str.begin(), str.end(), [](char c) {
return static_cast<unsigned char>(c) > 0x7f;
});
}
} // namespace
Uri::Uri() : pim_(std::make_unique<Pim>()) {}
Uri::Uri(const std::string& uri) : pim_(std::make_unique<Pim>()) {
// Omits leading and trailing whitespaces ( \r\n\t\f\v).
const size_t prefix_size =
uri.size() -
base::TrimWhitespaceASCII(uri, base::TrimPositions::TRIM_LEADING).size();
if (prefix_size == uri.size())
return;
const size_t suffix_size =
uri.size() -
base::TrimWhitespaceASCII(uri, base::TrimPositions::TRIM_TRAILING).size();
// Runs the parser.
pim_->ParseUri(uri.begin() + prefix_size, uri.end() - suffix_size);
pim_->parser_error().parsed_chars += prefix_size;
}
int Uri::GetDefaultPort(const std::string& scheme) {
auto it = Pim::kDefaultPorts.find(scheme);
if (it == Pim::kDefaultPorts.end())
return -1;
return it->second;
}
Uri::Uri(const Uri& uri) : pim_(std::make_unique<Pim>(*uri.pim_)) {}
Uri::Uri(Uri&& uri) : pim_(std::make_unique<Pim>()) {
pim_.swap(uri.pim_);
}
Uri::~Uri() = default;
Uri& Uri::operator=(const Uri& uri) {
*pim_ = *uri.pim_;
return *this;
}
Uri& Uri::operator=(Uri&& uri) {
pim_.swap(uri.pim_);
return *this;
}
const Uri::ParserError& Uri::GetLastParsingError() const {
return pim_->parser_error();
}
// If the Port is unspecified (==-1) it is not included in the output URI.
// If the Port is specified, it is included in the output URI <=> at least one
// of the following conditions is true:
// - |always_print_port| == true
// - Scheme has no default port number
// - Port is different than Scheme's default port number.
std::string Uri::GetNormalized(bool always_print_port) const {
// Calculates a string representation of the Port number.
std::string port;
if (pim_->port() >= 0 &&
(always_print_port || Pim::kDefaultPorts.count(pim_->scheme()) == 0 ||
Pim::kDefaultPorts.at(pim_->scheme()) != pim_->port()))
port = base::NumberToString(pim_->port());
// Output string. Adds Scheme.
std::string out = pim_->scheme();
out.push_back(':');
// Adds authority (Userinfo + Host + Port) if non-empty.
Encoder enc("+&=:");
if (!(pim_->userinfo().empty() && pim_->host().empty() && port.empty())) {
out.append("//");
// Userinfo.
if (!pim_->userinfo().empty()) {
enc.EncodeAndAppend(pim_->userinfo(), &out);
out.push_back('@');
}
// Host.
enc.EncodeAndAppend(pim_->host(), &out);
// Port.
if (!port.empty()) {
out.push_back(':');
out.append(port);
}
}
// Adds Path.
enc.Allow("@");
for (auto& segment : pim_->path()) {
out.push_back('/');
enc.EncodeAndAppend(segment, &out);
}
// Adds Query.
enc.Disallow("+&=");
enc.Allow("/?");
for (auto it = pim_->query().begin(); it != pim_->query().end(); ++it) {
if (it == pim_->query().begin()) {
out.push_back('?');
} else {
out.push_back('&');
}
enc.EncodeAndAppend(it->first, &out);
if (!it->second.empty()) {
out.push_back('=');
enc.EncodeAndAppend(it->second, &out);
}
}
// Adds Fragment.
enc.Allow("+&=");
if (!pim_->fragment().empty()) {
out.push_back('#');
enc.EncodeAndAppend(pim_->fragment(), &out);
}
return out;
}
bool Uri::IsASCII() const {
if (HasNonASCII(pim_->userinfo()) || HasNonASCII(pim_->host()) ||
HasNonASCII(pim_->fragment())) {
return false;
}
for (auto& s : pim_->path()) {
if (HasNonASCII(s))
return false;
}
for (auto& p : pim_->query()) {
if (HasNonASCII(p.first) || HasNonASCII(p.second))
return false;
}
return true;
}
std::string Uri::GetScheme() const {
return pim_->scheme();
}
bool Uri::SetScheme(const std::string& val) {
pim_->ResetParserError();
return pim_->ParseScheme(val.begin(), val.end());
}
int Uri::GetPort() const {
return pim_->port();
}
bool Uri::SetPort(int val) {
pim_->ResetParserError();
return pim_->SavePort(val);
}
std::string Uri::GetUserinfo() const {
return pim_->userinfo();
}
std::string Uri::GetHost() const {
return pim_->host();
}
std::vector<std::string> Uri::GetPath() const {
return pim_->path();
}
std::vector<std::pair<std::string, std::string>> Uri::GetQuery() const {
return pim_->query();
}
std::string Uri::GetFragment() const {
return pim_->fragment();
}
std::string Uri::GetUserinfoEncoded() const {
Encoder enc("+&=:");
return enc.Encode(pim_->userinfo());
}
std::string Uri::GetHostEncoded() const {
Encoder enc("+&=");
return enc.Encode(pim_->host());
}
std::vector<std::string> Uri::GetPathEncoded() const {
Encoder enc("+&=:@");
std::vector<std::string> out(pim_->path().size());
for (size_t i = 0; i < out.size(); ++i)
out[i] = enc.Encode(pim_->path()[i]);
return out;
}
std::string Uri::GetPathEncodedAsString() const {
Encoder enc("+&=:@");
std::string out;
for (auto& segment : pim_->path())
out += "/" + enc.Encode(segment);
return out;
}
std::vector<std::pair<std::string, std::string>> Uri::GetQueryEncoded() const {
Encoder enc(":@/?");
std::vector<std::pair<std::string, std::string>> out(pim_->query().size());
for (size_t i = 0; i < out.size(); ++i) {
out[i].first = enc.Encode(pim_->query()[i].first);
out[i].second = enc.Encode(pim_->query()[i].second);
}
return out;
}
std::string Uri::GetQueryEncodedAsString() const {
Encoder enc(":@/?");
std::string out;
for (auto& param_value : pim_->query()) {
if (!out.empty())
out.push_back('&');
out += enc.Encode(param_value.first);
if (!param_value.second.empty())
out += "=" + enc.Encode(param_value.second);
}
return out;
}
std::string Uri::GetFragmentEncoded() const {
Encoder enc("+&=:@/?");
return enc.Encode(pim_->fragment());
}
bool Uri::SetUserinfo(const std::string& val) {
pim_->ResetParserError();
return pim_->SaveUserinfo<false>(val);
}
bool Uri::SetHost(const std::string& val) {
pim_->ResetParserError();
return pim_->SaveHost<false>(val);
}
bool Uri::SetPath(const std::vector<std::string>& val) {
pim_->ResetParserError();
return pim_->SavePath<false>(val);
}
bool Uri::SetQuery(
const std::vector<std::pair<std::string, std::string>>& val) {
pim_->ResetParserError();
return pim_->SaveQuery<false>(val);
}
bool Uri::SetFragment(const std::string& val) {
pim_->ResetParserError();
return pim_->SaveFragment<false>(val);
}
bool Uri::SetUserinfoEncoded(const std::string& val) {
pim_->ResetParserError();
return pim_->SaveUserinfo<true>(val);
}
bool Uri::SetHostEncoded(const std::string& val) {
pim_->ResetParserError();
return pim_->SaveHost<true>(val);
}
bool Uri::SetPathEncoded(const std::vector<std::string>& val) {
pim_->ResetParserError();
return pim_->SavePath<true>(val);
}
bool Uri::SetPathEncoded(const std::string& val) {
pim_->ResetParserError();
return pim_->ParsePath(val.begin(), val.end());
}
bool Uri::SetQueryEncoded(
const std::vector<std::pair<std::string, std::string>>& val) {
pim_->ResetParserError();
return pim_->SaveQuery<true>(val);
}
bool Uri::SetQueryEncoded(const std::string& val) {
pim_->ResetParserError();
return pim_->ParseQuery(val.begin(), val.end());
}
bool Uri::SetFragmentEncoded(const std::string& val) {
pim_->ResetParserError();
return pim_->SaveFragment<true>(val);
}
bool Uri::operator<(const Uri& uri) const {
if (pim_->scheme() < uri.pim_->scheme())
return true;
if (pim_->scheme() > uri.pim_->scheme())
return false;
if (pim_->userinfo() < uri.pim_->userinfo())
return true;
if (pim_->userinfo() > uri.pim_->userinfo())
return false;
if (pim_->host() < uri.pim_->host())
return true;
if (pim_->host() > uri.pim_->host())
return false;
if (pim_->port() < uri.pim_->port())
return true;
if (pim_->port() > uri.pim_->port())
return false;
if (pim_->path() < uri.pim_->path())
return true;
if (pim_->path() > uri.pim_->path())
return false;
if (pim_->query() < uri.pim_->query())
return true;
if (pim_->query() > uri.pim_->query())
return false;
return (pim_->fragment() < uri.pim_->fragment());
}
bool Uri::operator==(const Uri& uri) const {
if (pim_->scheme() != uri.pim_->scheme())
return false;
if (pim_->userinfo() != uri.pim_->userinfo())
return false;
if (pim_->host() != uri.pim_->host())
return false;
if (pim_->port() != uri.pim_->port())
return false;
if (pim_->path() != uri.pim_->path())
return false;
if (pim_->query() != uri.pim_->query())
return false;
return (pim_->fragment() == uri.pim_->fragment());
}
} // namespace chromeos

@ -257,7 +257,7 @@ class Uri {
Uri();
// Constructor, it tries to parse |uri|.
// Leading and trailing white spaces (space, \t, \n, \r, \f) are ignored.
// Leading and trailing whitespaces (space, \t, \n, \r, \f, \v) are ignored.
explicit Uri(const std::string& uri);
Uri(const Uri&);

@ -0,0 +1,133 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMEOS_PRINTING_URI_IMPL_H_
#define CHROMEOS_PRINTING_URI_IMPL_H_
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "chromeos/printing/uri.h"
// This file contains a declaration of struct used in the implementation of
// class Uri declared in uri.h. This file is not supposed to be included
// anywhere outside of the class Uri.
namespace chromeos {
using Iter = std::string::const_iterator;
class Uri::Pim {
public:
// The map with pairs scheme -> default_port.
static const std::map<std::string, int> kDefaultPorts;
// Resets the internal field |parser_error|.
void ResetParserError() {
parser_error_.parsed_chars = 0;
parser_error_.parsed_strings = 0;
parser_error_.status = ParserStatus::kNoErrors;
}
// These methods parse and normalize the corresponding component(s) from the
// input string |begin|-|end|. Each component is saved only if successfully
// parsed and verified. In case of an error, the field |parser_error| is set
// and false is returned. All methods assume that |begin| <= |end|.
// Additional notes for particular components:
// * Scheme: if the current Port is unspecified and the new Scheme has
// default port number, the Port is set to this default value
// * Authority: this is Userinfo + Host + Port, see description in uri.h for
// the grammar
// * Path: the input string must be empty or starts from '/'
bool ParseScheme(const Iter& begin, const Iter& end);
bool ParseAuthority(const Iter& begin, const Iter& end);
bool ParsePath(const Iter& begin, const Iter& end);
bool ParseQuery(const Iter& begin, const Iter& end);
bool ParseFragment(const Iter& begin, const Iter& end);
// This method parse the whole URI. It calls internally the methods
// Parse*(...) declared above. In case of an error, the method set the field
// |parser_error| and returns false. Parsing stops on the first error,
// components that have been successfully parsed are saved.
bool ParseUri(const Iter& begin, const Iter end);
// This method fails (and return false) <=> |port| is smaller than -1 or
// larger than 65535. If |port| == -1 and the current Scheme has a default
// port, the default port is set as a new Port number.
bool SavePort(int port);
// These methods save values of corresponding components. The template
// parameter |encoded| trigger resolution of %-escaped characters. If set to
// true, every % sign in the input value is treated as the beginning of
// %-escaped character; if set to false, % signs are treated as regular ASCII
// characters. All input values are validated and normalized, but without
// %-escaping fragile characters (components are stored in "native" form). In
// case of a failure, false is returned, the value of target component is not
// modified, and the field |parser_error| is set accordingly.
template <bool encoded>
bool SaveUserinfo(const std::string& val);
template <bool encoded>
bool SaveHost(const std::string& val);
template <bool encoded>
bool SavePath(const std::vector<std::string>& val);
template <bool encoded>
bool SaveQuery(const std::vector<std::pair<std::string, std::string>>& val);
template <bool encoded>
bool SaveFragment(const std::string& val);
// Getters for all fields.
const std::string& scheme() const { return scheme_; }
const std::string& userinfo() const { return userinfo_; }
const std::string& host() const { return host_; }
int port() const { return port_; }
const std::vector<std::string>& path() const { return path_; }
const std::vector<std::pair<std::string, std::string>>& query() const {
return query_;
}
const std::string& fragment() const { return fragment_; }
// Access to the |parser_error_|
ParserError& parser_error() { return parser_error_; }
private:
// Reads the string |begin|-|end| and perform the following operations:
// 1. if |plus_to_space| is true, all '+' signs are converted to ' ' (space)
// 2. if |encoded| is true, all % signs are treated as initiators of
// %-escaped characters and decoded to corresponding ASCII
// 3. if |case_insensitive| is true, all capital ASCII letters are converted
// to lowercase
// 4. all UTF-8 characters are validated
// 5. all ASCII characters are validated (see the section Encoding in uri.h)
// The output is saved to |out|. In case of an error, the method set the field
// |parser_error| and returns false.
// The following initial requirements must be met:
// * |begin| <= |end|
// * |out| must point to an empty string
// When the method returns false, |out| may contain invalid value.
template <bool encoded, bool case_insensitive = false>
bool ParseString(const Iter& begin,
const Iter& end,
std::string* out,
bool plus_to_space = false);
// Components values. They are valid and normalized, but before %-escaping.
std::string scheme_;
std::string userinfo_;
std::string host_;
int port_ = -1; // -1 means "unspecified"
// A list of path's segments, without separators ('/').
std::vector<std::string> path_;
// A list of parameters name=value; value may be empty.
std::vector<std::pair<std::string, std::string>> query_;
std::string fragment_;
// The last parser status.
ParserError parser_error_;
};
} // namespace chromeos
#endif /* CHROMEOS_PRINTING_URI_IMPL_H_ */