// Copyright 2017 Google Inc. All rights reserved.
|
//
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
// you may not use this file except in compliance with the License.
|
// You may obtain a copy of the License at
|
//
|
// http://www.apache.org/licenses/LICENSE-2.0
|
//
|
// Unless required by applicable law or agreed to in writing, software
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
// See the License for the specific language governing permissions and
|
// limitations under the License.
|
|
#include "src/utf8_fix.h"
|
|
#include <algorithm>
|
#include <cassert>
|
|
namespace protobuf_mutator {
|
|
namespace {
|
|
void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) {
|
while (--size) {
|
*(--e) = 0x80 | (code & 0x3F);
|
code >>= 6;
|
}
|
*(--e) = prefix | code;
|
}
|
|
char* FixCode(char* b, const char* e, RandomEngine* random) {
|
const char* start = b;
|
assert(b < e);
|
|
e = std::min<const char*>(e, b + 4);
|
char32_t c = *b++;
|
for (; b < e && (*b & 0xC0) == 0x80; ++b) {
|
c = (c << 6) + (*b & 0x3F);
|
}
|
uint8_t size = b - start;
|
switch (size) {
|
case 1:
|
c &= 0x7F;
|
StoreCode(b, c, size, 0);
|
break;
|
case 2:
|
c &= 0x7FF;
|
if (c < 0x80) {
|
c = std::uniform_int_distribution<char32_t>(0x80, 0x7FF)(*random);
|
}
|
StoreCode(b, c, size, 0xC0);
|
break;
|
case 3:
|
c &= 0xFFFF;
|
|
// [0xD800, 0xE000) are reserved for UTF-16 surrogate halves.
|
if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) {
|
uint32_t halves = 0xE000 - 0xD800;
|
c = std::uniform_int_distribution<char32_t>(0x800,
|
0xFFFF - halves)(*random);
|
if (c >= 0xD800) c += halves;
|
}
|
StoreCode(b, c, size, 0xE0);
|
break;
|
case 4:
|
c &= 0x1FFFFF;
|
if (c < 0x10000 || c > 0x10FFFF) {
|
c = std::uniform_int_distribution<char32_t>(0x10000, 0x10FFFF)(*random);
|
}
|
StoreCode(b, c, size, 0xF0);
|
break;
|
default:
|
assert(false && "Unexpected size of UTF-8 sequence");
|
}
|
return b;
|
}
|
|
} // namespace
|
|
void FixUtf8String(std::string* str, RandomEngine* random) {
|
if (str->empty()) return;
|
char* b = &(*str)[0];
|
const char* e = b + str->size();
|
while (b < e) {
|
b = FixCode(b, e, random);
|
}
|
}
|
|
} // namespace protobuf_mutator
|