/*
|
* Copyright (C) 2017 The Android Open Source Project
|
*
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
*/
|
|
#define LOG_TAG "GreedyLineBreak"
|
|
#include "minikin/Characters.h"
|
#include "minikin/LineBreaker.h"
|
#include "minikin/MeasuredText.h"
|
#include "minikin/Range.h"
|
#include "minikin/U16StringPiece.h"
|
|
#include "HyphenatorMap.h"
|
#include "LineBreakerUtil.h"
|
#include "Locale.h"
|
#include "LocaleListCache.h"
|
#include "WordBreaker.h"
|
|
namespace minikin {
|
|
namespace {
|
|
constexpr uint32_t NOWHERE = 0xFFFFFFFF;
|
|
class GreedyLineBreaker {
|
public:
|
// User of this class must keep measured, lineWidthLimit, tabStop alive until the instance is
|
// destructed.
|
GreedyLineBreaker(const U16StringPiece& textBuf, const MeasuredText& measured,
|
const LineWidth& lineWidthLimits, const TabStops& tabStops,
|
bool enableHyphenation)
|
: mLineWidthLimit(lineWidthLimits.getAt(0)),
|
mTextBuf(textBuf),
|
mMeasuredText(measured),
|
mLineWidthLimits(lineWidthLimits),
|
mTabStops(tabStops),
|
mEnableHyphenation(enableHyphenation) {}
|
|
void process();
|
|
LineBreakResult getResult() const;
|
|
private:
|
struct BreakPoint {
|
BreakPoint(uint32_t offset, float lineWidth, StartHyphenEdit startHyphen,
|
EndHyphenEdit endHyphen)
|
: offset(offset),
|
lineWidth(lineWidth),
|
hyphenEdit(packHyphenEdit(startHyphen, endHyphen)) {}
|
|
uint32_t offset;
|
float lineWidth;
|
HyphenEdit hyphenEdit;
|
};
|
|
inline uint32_t getPrevLineBreakOffset() {
|
return mBreakPoints.empty() ? 0 : mBreakPoints.back().offset;
|
}
|
|
// Registers the break point and prepares for next line computation.
|
void breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
|
float remainingNextSumOfCharWidths, EndHyphenEdit thisLineEndHyphen,
|
StartHyphenEdit nextLineStartHyphen);
|
|
// Update current line width.
|
void updateLineWidth(uint16_t c, float width);
|
|
// Break line if current line exceeds the line limit.
|
void processLineBreak(uint32_t offset, WordBreaker* breaker, bool doHyphenation);
|
|
// Try to break with previous word boundary.
|
// Returns false if unable to break by word boundary.
|
bool tryLineBreakWithWordBreak();
|
|
// Try to break with hyphenation.
|
// Returns false if unable to hyphenate.
|
//
|
// This method keeps hyphenation until the line width after line break meets the line width
|
// limit.
|
bool tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker);
|
|
// Do line break with each characters.
|
//
|
// This method only breaks at the first offset which has the longest width for the line width
|
// limit. This method don't keep line breaking even if the rest of the word exceeds the line
|
// width limit.
|
// This method return true if there is no characters to be processed.
|
bool doLineBreakWithGraphemeBounds(const Range& range);
|
|
// Info about the line currently processing.
|
uint32_t mLineNum = 0;
|
double mLineWidth = 0;
|
double mSumOfCharWidths = 0;
|
double mLineWidthLimit;
|
StartHyphenEdit mStartHyphenEdit = StartHyphenEdit::NO_EDIT;
|
|
// Previous word break point info.
|
uint32_t mPrevWordBoundsOffset = NOWHERE;
|
double mLineWidthAtPrevWordBoundary = 0;
|
double mSumOfCharWidthsAtPrevWordBoundary = 0;
|
bool mIsPrevWordBreakIsInEmailOrUrl = false;
|
|
// The hyphenator currently used.
|
const Hyphenator* mHyphenator = nullptr;
|
|
// Input parameters.
|
const U16StringPiece& mTextBuf;
|
const MeasuredText& mMeasuredText;
|
const LineWidth& mLineWidthLimits;
|
const TabStops& mTabStops;
|
bool mEnableHyphenation;
|
|
// The result of line breaking.
|
std::vector<BreakPoint> mBreakPoints;
|
|
MINIKIN_PREVENT_COPY_ASSIGN_AND_MOVE(GreedyLineBreaker);
|
};
|
|
void GreedyLineBreaker::breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
|
float remainingNextSumOfCharWidths,
|
EndHyphenEdit thisLineEndHyphen,
|
StartHyphenEdit nextLineStartHyphen) {
|
// First, push the break to result.
|
mBreakPoints.emplace_back(offset, lineWidth, mStartHyphenEdit, thisLineEndHyphen);
|
|
// Update the current line info.
|
mLineWidthLimit = mLineWidthLimits.getAt(++mLineNum);
|
mLineWidth = remainingNextLineWidth;
|
mSumOfCharWidths = remainingNextSumOfCharWidths;
|
mStartHyphenEdit = nextLineStartHyphen;
|
mPrevWordBoundsOffset = NOWHERE;
|
mLineWidthAtPrevWordBoundary = 0;
|
mSumOfCharWidthsAtPrevWordBoundary = 0;
|
mIsPrevWordBreakIsInEmailOrUrl = false;
|
}
|
|
bool GreedyLineBreaker::tryLineBreakWithWordBreak() {
|
if (mPrevWordBoundsOffset == NOWHERE) {
|
return false; // No word break point before..
|
}
|
|
breakLineAt(mPrevWordBoundsOffset, // break offset
|
mLineWidthAtPrevWordBoundary, // line width
|
mLineWidth - mSumOfCharWidthsAtPrevWordBoundary, // remaining next line width
|
// remaining next sum of char widths.
|
mSumOfCharWidths - mSumOfCharWidthsAtPrevWordBoundary, EndHyphenEdit::NO_EDIT,
|
StartHyphenEdit::NO_EDIT); // No hyphen modification.
|
return true;
|
}
|
|
bool GreedyLineBreaker::tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker) {
|
if (!mEnableHyphenation || mHyphenator == nullptr) {
|
return false;
|
}
|
|
Run* targetRun = nullptr;
|
for (const auto& run : mMeasuredText.runs) {
|
if (run->getRange().contains(range)) {
|
targetRun = run.get();
|
}
|
}
|
|
if (targetRun == nullptr) {
|
return false; // The target range may lay on multiple run. Unable to hyphenate.
|
}
|
|
const Range targetRange = breaker->wordRange();
|
if (!range.contains(targetRange)) {
|
return false;
|
}
|
|
const std::vector<HyphenationType> hyphenResult =
|
hyphenate(mTextBuf.substr(targetRange), *mHyphenator);
|
Range contextRange = range;
|
uint32_t prevOffset = NOWHERE;
|
float prevWidth = 0;
|
|
// Look up the hyphenation point from the begining.
|
for (uint32_t i = targetRange.getStart(); i < targetRange.getEnd(); ++i) {
|
const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(i)];
|
if (hyph == HyphenationType::DONT_BREAK) {
|
continue; // Not a hyphenation point.
|
}
|
|
const float width =
|
targetRun->measureHyphenPiece(mTextBuf, contextRange.split(i).first,
|
mStartHyphenEdit, editForThisLine(hyph), nullptr);
|
|
if (width <= mLineWidthLimit) {
|
// There are still space, remember current offset and look up next hyphenation point.
|
prevOffset = i;
|
prevWidth = width;
|
continue;
|
}
|
|
if (prevOffset == NOWHERE) {
|
// Even with hyphenation, the piece is too long for line. Give up and break in
|
// character bounds.
|
doLineBreakWithGraphemeBounds(contextRange);
|
} else {
|
// Previous offset is the longest hyphenation piece. Break with it.
|
const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
|
const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
|
const float remainingCharWidths = targetRun->measureHyphenPiece(
|
mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
|
EndHyphenEdit::NO_EDIT, nullptr);
|
breakLineAt(prevOffset, prevWidth,
|
remainingCharWidths - (mSumOfCharWidths - mLineWidth), remainingCharWidths,
|
editForThisLine(hyph), nextLineStartHyphenEdit);
|
}
|
|
if (mLineWidth <= mLineWidthLimit) {
|
// The remaining hyphenation piece is less than line width. No more hyphenation is
|
// needed. Go to next word.
|
return true;
|
}
|
|
// Even after line break, the remaining hyphenation piece is still too long for the limit.
|
// Keep hyphenating for the rest.
|
i = getPrevLineBreakOffset();
|
contextRange.setStart(i); // Update the hyphenation start point.
|
prevOffset = NOWHERE;
|
}
|
|
// Do the same line break at the end of text.
|
// TODO: Remove code duplication. This is the same as in the for loop but extracting function
|
// may not clear.
|
if (prevOffset == NOWHERE) {
|
doLineBreakWithGraphemeBounds(contextRange);
|
} else {
|
const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
|
const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
|
const float remainingCharWidths = targetRun->measureHyphenPiece(
|
mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
|
EndHyphenEdit::NO_EDIT, nullptr);
|
|
breakLineAt(prevOffset, prevWidth, remainingCharWidths - (mSumOfCharWidths - mLineWidth),
|
remainingCharWidths, editForThisLine(hyph), nextLineStartHyphenEdit);
|
}
|
|
return true;
|
}
|
|
// TODO: Respect trailing line end spaces.
|
bool GreedyLineBreaker::doLineBreakWithGraphemeBounds(const Range& range) {
|
double width = mMeasuredText.widths[range.getStart()];
|
|
// Starting from + 1 since at least one character needs to be assigned to a line.
|
for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
|
const float w = mMeasuredText.widths[i];
|
if (w == 0) {
|
continue; // w == 0 means here is not a grapheme bounds. Don't break here.
|
}
|
if (width + w > mLineWidthLimit) {
|
// Okay, here is the longest position.
|
breakLineAt(i, width, mLineWidth - width, mSumOfCharWidths - width,
|
EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
|
|
// This method only breaks at the first longest offset, since we may want to hyphenate
|
// the rest of the word.
|
return false;
|
} else {
|
width += w;
|
}
|
}
|
|
// Reaching here means even one character (or cluster) doesn't fit the line.
|
// Give up and break at the end of this range.
|
breakLineAt(range.getEnd(), mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
|
return true;
|
}
|
|
void GreedyLineBreaker::updateLineWidth(uint16_t c, float width) {
|
if (c == CHAR_TAB) {
|
mSumOfCharWidths = mTabStops.nextTab(mSumOfCharWidths);
|
mLineWidth = mSumOfCharWidths;
|
} else {
|
mSumOfCharWidths += width;
|
if (!isLineEndSpace(c)) {
|
mLineWidth = mSumOfCharWidths;
|
}
|
}
|
}
|
|
void GreedyLineBreaker::processLineBreak(uint32_t offset, WordBreaker* breaker,
|
bool doHyphenation) {
|
while (mLineWidth > mLineWidthLimit) {
|
const Range lineRange(getPrevLineBreakOffset(), offset); // The range we need to address.
|
if (tryLineBreakWithWordBreak()) {
|
continue; // The word in the new line may still be too long for the line limit.
|
} else if (doHyphenation && tryLineBreakWithHyphenation(lineRange, breaker)) {
|
continue; // TODO: we may be able to return here.
|
} else {
|
if (doLineBreakWithGraphemeBounds(lineRange)) {
|
return;
|
}
|
}
|
}
|
|
// There is still spaces, remember current word break point as a candidate and wait next word.
|
const bool isInEmailOrUrl = breaker->breakBadness() != 0;
|
if (mPrevWordBoundsOffset == NOWHERE || mIsPrevWordBreakIsInEmailOrUrl | !isInEmailOrUrl) {
|
mPrevWordBoundsOffset = offset;
|
mLineWidthAtPrevWordBoundary = mLineWidth;
|
mSumOfCharWidthsAtPrevWordBoundary = mSumOfCharWidths;
|
mIsPrevWordBreakIsInEmailOrUrl = isInEmailOrUrl;
|
}
|
}
|
|
void GreedyLineBreaker::process() {
|
WordBreaker wordBreaker;
|
wordBreaker.setText(mTextBuf.data(), mTextBuf.size());
|
|
// Following two will be initialized after the first iteration.
|
uint32_t localeListId = LocaleListCache::kInvalidListId;
|
uint32_t nextWordBoundaryOffset = 0;
|
for (const auto& run : mMeasuredText.runs) {
|
const Range range = run->getRange();
|
|
// Update locale if necessary.
|
uint32_t newLocaleListId = run->getLocaleListId();
|
if (localeListId != newLocaleListId) {
|
Locale locale = getEffectiveLocale(newLocaleListId);
|
nextWordBoundaryOffset = wordBreaker.followingWithLocale(locale, range.getStart());
|
mHyphenator = HyphenatorMap::lookup(locale);
|
localeListId = newLocaleListId;
|
}
|
|
for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
|
updateLineWidth(mTextBuf[i], mMeasuredText.widths[i]);
|
|
if ((i + 1) == nextWordBoundaryOffset) {
|
// Only process line break at word boundary and the run can break into some pieces.
|
if (run->canBreak() || nextWordBoundaryOffset == range.getEnd()) {
|
processLineBreak(i + 1, &wordBreaker, run->canBreak());
|
}
|
nextWordBoundaryOffset = wordBreaker.next();
|
}
|
}
|
}
|
|
if (getPrevLineBreakOffset() != mTextBuf.size() && mPrevWordBoundsOffset != NOWHERE) {
|
// The remaining words in the last line.
|
breakLineAt(mPrevWordBoundsOffset, mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT,
|
StartHyphenEdit::NO_EDIT);
|
}
|
}
|
|
LineBreakResult GreedyLineBreaker::getResult() const {
|
constexpr int TAB_BIT = 1 << 29; // Must be the same in StaticLayout.java
|
|
LineBreakResult out;
|
uint32_t prevBreakOffset = 0;
|
for (const auto& breakPoint : mBreakPoints) {
|
// TODO: compute these during line breaking if these takes longer time.
|
bool hasTabChar = false;
|
for (uint32_t i = prevBreakOffset; i < breakPoint.offset; ++i) {
|
hasTabChar |= mTextBuf[i] == CHAR_TAB;
|
}
|
|
MinikinExtent extent =
|
mMeasuredText.getExtent(mTextBuf, Range(prevBreakOffset, breakPoint.offset));
|
out.breakPoints.push_back(breakPoint.offset);
|
out.widths.push_back(breakPoint.lineWidth);
|
out.ascents.push_back(extent.ascent);
|
out.descents.push_back(extent.descent);
|
out.flags.push_back((hasTabChar ? TAB_BIT : 0) | static_cast<int>(breakPoint.hyphenEdit));
|
|
prevBreakOffset = breakPoint.offset;
|
}
|
return out;
|
}
|
|
} // namespace
|
|
LineBreakResult breakLineGreedy(const U16StringPiece& textBuf, const MeasuredText& measured,
|
const LineWidth& lineWidthLimits, const TabStops& tabStops,
|
bool enableHyphenation) {
|
if (textBuf.size() == 0) {
|
return LineBreakResult();
|
}
|
GreedyLineBreaker lineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation);
|
lineBreaker.process();
|
return lineBreaker.getResult();
|
}
|
|
} // namespace minikin
|