#include "image_io/base/data_scanner.h"
|
|
namespace photos_editing_formats {
|
namespace image_io {
|
|
namespace {
|
|
const char kWhitespaceChars[] = " \t\n\r";
|
|
/// This function is like strspn but does not assume a null-terminated string.
|
size_t memspn(const char* s, size_t slen, const char* accept) {
|
const char* p = s;
|
const char* spanp;
|
char c, sc;
|
|
cont:
|
c = *p++;
|
if (slen-- == 0) return p - 1 - s;
|
for (spanp = accept; (sc = *spanp++) != '\0';)
|
if (sc == c) goto cont;
|
return p - 1 - s;
|
}
|
|
/// @return Whether value is in the range [lo:hi].
|
bool InRange(char value, char lo, char hi) {
|
return value >= lo && value <= hi;
|
}
|
|
/// @return Whether the value is the first character of a kName type scanner.
|
bool IsFirstNameChar(char value) {
|
return InRange(value, 'A', 'Z') || InRange(value, 'a', 'z') || value == '_' ||
|
value == ':';
|
}
|
|
/// Scans the characters in the s string, where the characters can be any legal
|
/// character in the name.
|
/// @return The number of name characters scanned.
|
size_t ScanOptionalNameChars(const char* s, size_t slen) {
|
const char* kOptionalChars =
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_:";
|
return memspn(s, slen, kOptionalChars);
|
}
|
|
/// Scans the whitespace characters in the s string.
|
/// @return The number of whitepace characters scanned.
|
size_t ScanWhitespaceChars(const char* s, size_t slen) {
|
return memspn(s, slen, kWhitespaceChars);
|
}
|
|
} // namespace
|
|
std::string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; }
|
|
DataScanner DataScanner::CreateLiteralScanner(const std::string& literal) {
|
return DataScanner(DataScanner::kLiteral, literal);
|
}
|
|
DataScanner DataScanner::CreateNameScanner() {
|
return DataScanner(DataScanner::kName);
|
}
|
|
DataScanner DataScanner::CreateQuotedStringScanner() {
|
return DataScanner(DataScanner::kQuotedString);
|
}
|
|
DataScanner DataScanner::CreateSentinelScanner(const std::string& sentinels) {
|
return DataScanner(DataScanner::kSentinel, sentinels);
|
}
|
|
DataScanner DataScanner::CreateThroughLiteralScanner(
|
const std::string& literal) {
|
return DataScanner(DataScanner::kThroughLiteral, literal);
|
}
|
|
DataScanner DataScanner::CreateWhitespaceScanner() {
|
return DataScanner(DataScanner::kWhitespace);
|
}
|
|
DataScanner DataScanner::CreateOptionalWhitespaceScanner() {
|
return DataScanner(DataScanner::kOptionalWhitespace);
|
}
|
|
size_t DataScanner::ExtendTokenLength(size_t delta_length) {
|
token_range_ =
|
DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length);
|
return token_range_.GetLength();
|
}
|
|
void DataScanner::SetInternalError(const DataContext& context,
|
const std::string& error_description,
|
DataMatchResult* result) {
|
result->SetType(DataMatchResult::kError);
|
result->SetMessage(
|
Message::kInternalError,
|
context.GetErrorText({}, {GetDescription()}, error_description, ""));
|
}
|
|
void DataScanner::SetSyntaxError(const DataContext& context,
|
const std::string& error_description,
|
DataMatchResult* result) {
|
result->SetType(DataMatchResult::kError);
|
result->SetMessage(Message::kSyntaxError,
|
context.GetErrorText(error_description, GetDescription()));
|
}
|
|
DataMatchResult DataScanner::ScanLiteral(const char* cbytes,
|
size_t bytes_available,
|
const DataContext& context) {
|
DataMatchResult result;
|
size_t token_length = token_range_.GetLength();
|
if (token_length >= literal_or_sentinels_.length()) {
|
SetInternalError(context, "Literal already scanned", &result);
|
return result;
|
}
|
size_t bytes_still_needed = literal_or_sentinels_.length() - token_length;
|
size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
|
if (strncmp(&literal_or_sentinels_[token_length], cbytes, bytes_to_compare) ==
|
0) {
|
token_length = ExtendTokenLength(bytes_to_compare);
|
result.SetBytesConsumed(bytes_to_compare);
|
result.SetType(token_length == literal_or_sentinels_.length()
|
? DataMatchResult::kFull
|
: DataMatchResult::kPartialOutOfData);
|
} else {
|
SetSyntaxError(context, "Expected literal", &result);
|
}
|
return result;
|
}
|
|
DataMatchResult DataScanner::ScanName(const char* cbytes,
|
size_t bytes_available,
|
const DataContext& context) {
|
DataMatchResult result;
|
size_t token_length = token_range_.GetLength();
|
if (token_length == 0) {
|
if (!IsFirstNameChar(*cbytes)) {
|
SetSyntaxError(context, "Expected first character of a name", &result);
|
return result;
|
}
|
token_length = ExtendTokenLength(1);
|
result.SetBytesConsumed(1);
|
bytes_available -= 1;
|
cbytes += 1;
|
}
|
size_t optional_bytes_consumed =
|
ScanOptionalNameChars(cbytes, bytes_available);
|
token_length = ExtendTokenLength(optional_bytes_consumed);
|
result.IncrementBytesConsumed(optional_bytes_consumed);
|
if (result.GetBytesConsumed() == 0 && token_length > 0) {
|
result.SetType(DataMatchResult::kFull);
|
} else if (optional_bytes_consumed < bytes_available) {
|
result.SetType(DataMatchResult::kFull);
|
} else {
|
result.SetType(DataMatchResult::kPartialOutOfData);
|
}
|
return result;
|
}
|
|
DataMatchResult DataScanner::ScanQuotedString(const char* cbytes,
|
size_t bytes_available,
|
const DataContext& context) {
|
const size_t kStart = 0;
|
const size_t kDone = '.';
|
const size_t kSquote = '\'';
|
const size_t kDquote = '"';
|
DataMatchResult result;
|
size_t token_length = token_range_.GetLength();
|
if ((data_ == kStart && token_length != 0) ||
|
(data_ != kStart && data_ != kSquote && data_ != kDquote)) {
|
SetInternalError(context, "Inconsistent state", &result);
|
return result;
|
}
|
if (data_ == kStart) {
|
if (*cbytes != kSquote && *cbytes != kDquote) {
|
SetSyntaxError(context, "Expected start of a quoted string", &result);
|
return result;
|
}
|
data_ = *cbytes++;
|
bytes_available--;
|
result.SetBytesConsumed(1);
|
token_length = ExtendTokenLength(1);
|
}
|
const char* ebytes = reinterpret_cast<const char*>(
|
memchr(cbytes, static_cast<int>(data_), bytes_available));
|
size_t bytes_scanned = ebytes ? ebytes - cbytes : bytes_available;
|
result.IncrementBytesConsumed(bytes_scanned);
|
token_length = ExtendTokenLength(bytes_scanned);
|
if (bytes_scanned == bytes_available) {
|
result.SetType(DataMatchResult::kPartialOutOfData);
|
} else {
|
result.SetType(DataMatchResult::kFull);
|
result.IncrementBytesConsumed(1);
|
ExtendTokenLength(1);
|
data_ = kDone;
|
}
|
return result;
|
}
|
|
DataMatchResult DataScanner::ScanSentinel(const char* cbytes,
|
size_t bytes_available,
|
const DataContext& context) {
|
DataMatchResult result;
|
if (data_ != 0) {
|
SetInternalError(context, "Sentinel already scanned", &result);
|
return result;
|
}
|
char cbyte = *cbytes;
|
for (size_t index = 0; index < literal_or_sentinels_.size(); ++index) {
|
char sentinel = literal_or_sentinels_[index];
|
if ((sentinel == '~' && IsFirstNameChar(cbyte)) || cbyte == sentinel) {
|
ExtendTokenLength(1);
|
result.SetBytesConsumed(1).SetType(DataMatchResult::kFull);
|
data_ = sentinel;
|
break;
|
}
|
}
|
if (result.GetBytesConsumed() == 0) {
|
SetSyntaxError(context, "Expected sentinal character", &result);
|
}
|
return result;
|
}
|
|
DataMatchResult DataScanner::ScanThroughLiteral(const char* cbytes,
|
size_t bytes_available,
|
const DataContext& context) {
|
DataMatchResult result;
|
size_t& scanned_literal_length = data_;
|
if (scanned_literal_length >= literal_or_sentinels_.length()) {
|
SetInternalError(context, "Literal already scanned", &result);
|
return result;
|
}
|
while (bytes_available > 0) {
|
if (scanned_literal_length == 0) {
|
// Literal scan not in progress. Find the first char of the literal.
|
auto* matched_byte = reinterpret_cast<const char*>(
|
memchr(cbytes, literal_or_sentinels_[0], bytes_available));
|
if (matched_byte == nullptr) {
|
// first char not found and chars exhausted.
|
ExtendTokenLength(bytes_available);
|
result.IncrementBytesConsumed(bytes_available);
|
result.SetType(DataMatchResult::kPartialOutOfData);
|
break;
|
} else {
|
// found the first char of the literal.
|
size_t bytes_scanned = (matched_byte - cbytes) + 1;
|
result.IncrementBytesConsumed(bytes_scanned);
|
bytes_available -= bytes_scanned;
|
cbytes += bytes_scanned;
|
ExtendTokenLength(bytes_scanned);
|
scanned_literal_length = 1;
|
}
|
}
|
// check if the rest of the literal is there.
|
size_t bytes_still_needed =
|
literal_or_sentinels_.length() - scanned_literal_length;
|
size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
|
if (strncmp(&literal_or_sentinels_[scanned_literal_length], cbytes,
|
bytes_to_compare) == 0) {
|
// Yes, the whole literal is there or chars are exhausted.
|
ExtendTokenLength(bytes_to_compare);
|
scanned_literal_length += bytes_to_compare;
|
result.IncrementBytesConsumed(bytes_to_compare);
|
result.SetType(scanned_literal_length == literal_or_sentinels_.length()
|
? DataMatchResult::kFull
|
: DataMatchResult::kPartialOutOfData);
|
break;
|
}
|
// false alarm, the firsts char of the literal were found, but not the
|
// whole enchilada. Keep searching at one past the first char of the match.
|
scanned_literal_length = 0;
|
}
|
return result;
|
}
|
|
DataMatchResult DataScanner::ScanWhitespace(const char* cbytes,
|
size_t bytes_available,
|
const DataContext& context) {
|
DataMatchResult result;
|
size_t token_length = token_range_.GetLength();
|
result.SetBytesConsumed(ScanWhitespaceChars(cbytes, bytes_available));
|
token_length = ExtendTokenLength(result.GetBytesConsumed());
|
if (result.GetBytesConsumed() == 0) {
|
if (token_length == 0 && type_ == kWhitespace) {
|
SetSyntaxError(context, "Expected whitespace", &result);
|
} else {
|
result.SetType(DataMatchResult::kFull);
|
}
|
} else {
|
result.SetType((result.GetBytesConsumed() < bytes_available)
|
? DataMatchResult::kFull
|
: DataMatchResult::kPartialOutOfData);
|
}
|
return result;
|
}
|
|
DataMatchResult DataScanner::Scan(const DataContext& context) {
|
scan_call_count_ += 1;
|
DataMatchResult result;
|
if (!context.IsValidLocationAndRange()) {
|
SetInternalError(context, context.GetInvalidLocationAndRangeErrorText(),
|
&result);
|
return result;
|
}
|
if (!token_range_.IsValid()) {
|
token_range_ = DataRange(context.GetLocation(), context.GetLocation());
|
}
|
size_t bytes_available = context.GetRange().GetEnd() - context.GetLocation();
|
const char* cbytes = context.GetCharBytes();
|
switch (type_) {
|
case kLiteral:
|
result = ScanLiteral(cbytes, bytes_available, context);
|
break;
|
case kName:
|
result = ScanName(cbytes, bytes_available, context);
|
break;
|
case kQuotedString:
|
result = ScanQuotedString(cbytes, bytes_available, context);
|
break;
|
case kSentinel:
|
result = ScanSentinel(cbytes, bytes_available, context);
|
break;
|
case kThroughLiteral:
|
result = ScanThroughLiteral(cbytes, bytes_available, context);
|
break;
|
case kWhitespace:
|
case kOptionalWhitespace:
|
result = ScanWhitespace(cbytes, bytes_available, context);
|
break;
|
default:
|
SetInternalError(context, "Undefined scanner type", &result);
|
break;
|
}
|
return result;
|
}
|
|
void DataScanner::ResetTokenRange() { token_range_ = DataRange(); }
|
|
void DataScanner::Reset() {
|
data_ = 0;
|
scan_call_count_ = 0;
|
ResetTokenRange();
|
}
|
|
std::string DataScanner::GetDescription() const {
|
std::string description;
|
switch (type_) {
|
case kLiteral:
|
description = "Literal:'";
|
description += literal_or_sentinels_;
|
description += "'";
|
break;
|
case kName:
|
description = "Name";
|
break;
|
case kQuotedString:
|
description = "QuotedString";
|
break;
|
case kSentinel:
|
description = "OneOf:'";
|
description += literal_or_sentinels_;
|
description += "'";
|
break;
|
case kThroughLiteral:
|
description = "ThruLiteral:'";
|
description += literal_or_sentinels_;
|
description += "'";
|
break;
|
case kWhitespace:
|
description = "Whitespace";
|
break;
|
case kOptionalWhitespace:
|
description = "OptionalWhitespace";
|
break;
|
}
|
return description;
|
}
|
|
std::string DataScanner::GetLiteral() const {
|
return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_
|
: "";
|
}
|
|
std::string DataScanner::GetSentenels() const {
|
return type_ == kSentinel ? literal_or_sentinels_ : "";
|
}
|
|
char DataScanner::GetSentinel() const { return type_ == kSentinel ? data_ : 0; }
|
|
} // namespace image_io
|
} // namespace photos_editing_formats
|