// Copyright 2017 PDFium Authors. All rights reserved.
|
// Use of this source code is governed by a BSD-style license that can be
|
// found in the LICENSE file.
|
|
#include <map>
|
#include <memory>
|
#include <utility>
|
#include <vector>
|
|
#include "core/fpdfapi/cpdf_modulemgr.h"
|
#include "core/fpdfapi/font/cpdf_font.h"
|
#include "core/fpdfapi/font/cpdf_type1font.h"
|
#include "core/fpdfapi/page/cpdf_docpagedata.h"
|
#include "core/fpdfapi/page/cpdf_textobject.h"
|
#include "core/fpdfapi/parser/cpdf_array.h"
|
#include "core/fpdfapi/parser/cpdf_dictionary.h"
|
#include "core/fpdfapi/parser/cpdf_document.h"
|
#include "core/fpdfapi/parser/cpdf_name.h"
|
#include "core/fpdfapi/parser/cpdf_number.h"
|
#include "core/fpdfapi/parser/cpdf_reference.h"
|
#include "core/fpdfapi/parser/cpdf_stream.h"
|
#include "core/fxcrt/fx_extension.h"
|
#include "core/fxge/cfx_fontmgr.h"
|
#include "core/fxge/fx_font.h"
|
#include "fpdfsdk/fsdk_define.h"
|
#include "public/fpdf_edit.h"
|
|
namespace {
|
|
CPDF_Dictionary* LoadFontDesc(CPDF_Document* pDoc,
|
const ByteString& font_name,
|
CFX_Font* pFont,
|
const uint8_t* data,
|
uint32_t size,
|
int font_type) {
|
CPDF_Dictionary* fontDesc = pDoc->NewIndirect<CPDF_Dictionary>();
|
fontDesc->SetNewFor<CPDF_Name>("Type", "FontDescriptor");
|
fontDesc->SetNewFor<CPDF_Name>("FontName", font_name);
|
int flags = 0;
|
if (FXFT_Is_Face_fixedwidth(pFont->GetFace()))
|
flags |= FXFONT_FIXED_PITCH;
|
if (font_name.Contains("Serif"))
|
flags |= FXFONT_SERIF;
|
if (FXFT_Is_Face_Italic(pFont->GetFace()))
|
flags |= FXFONT_ITALIC;
|
if (FXFT_Is_Face_Bold(pFont->GetFace()))
|
flags |= FXFONT_BOLD;
|
|
// TODO(npm): How do I know if a font is symbolic, script, allcap, smallcap
|
flags |= FXFONT_NONSYMBOLIC;
|
|
fontDesc->SetNewFor<CPDF_Number>("Flags", flags);
|
FX_RECT bbox;
|
pFont->GetBBox(bbox);
|
auto pBBox = pdfium::MakeUnique<CPDF_Array>();
|
pBBox->AddNew<CPDF_Number>(bbox.left);
|
pBBox->AddNew<CPDF_Number>(bbox.top);
|
pBBox->AddNew<CPDF_Number>(bbox.right);
|
pBBox->AddNew<CPDF_Number>(bbox.bottom);
|
fontDesc->SetFor("FontBBox", std::move(pBBox));
|
|
// TODO(npm): calculate italic angle correctly
|
fontDesc->SetNewFor<CPDF_Number>("ItalicAngle", pFont->IsItalic() ? -12 : 0);
|
|
fontDesc->SetNewFor<CPDF_Number>("Ascent", pFont->GetAscent());
|
fontDesc->SetNewFor<CPDF_Number>("Descent", pFont->GetDescent());
|
|
// TODO(npm): calculate the capheight, stemV correctly
|
fontDesc->SetNewFor<CPDF_Number>("CapHeight", pFont->GetAscent());
|
fontDesc->SetNewFor<CPDF_Number>("StemV", pFont->IsBold() ? 120 : 70);
|
|
CPDF_Stream* pStream = pDoc->NewIndirect<CPDF_Stream>();
|
pStream->SetData(data, size);
|
// TODO(npm): Lengths for Type1 fonts.
|
if (font_type == FPDF_FONT_TRUETYPE) {
|
pStream->GetDict()->SetNewFor<CPDF_Number>("Length1",
|
static_cast<int>(size));
|
}
|
ByteString fontFile = font_type == FPDF_FONT_TYPE1 ? "FontFile" : "FontFile2";
|
fontDesc->SetNewFor<CPDF_Reference>(fontFile, pDoc, pStream->GetObjNum());
|
return fontDesc;
|
}
|
|
const char ToUnicodeStart[] =
|
"/CIDInit /ProcSet findresource begin\n"
|
"12 dict begin\n"
|
"begincmap\n"
|
"/CIDSystemInfo\n"
|
"<</Registry (Adobe)\n"
|
"/Ordering (Identity)\n"
|
"/Supplement 0\n"
|
">> def\n"
|
"/CMapName /Adobe-Identity-H def\n"
|
"CMapType 2 def\n"
|
"1 begincodespacerange\n"
|
"<0000> <FFFFF>\n"
|
"endcodespacerange\n";
|
|
const char ToUnicodeEnd[] =
|
"endcmap\n"
|
"CMapName currentdict /CMap defineresource pop\n"
|
"end\n"
|
"end\n";
|
|
void AddCharcode(std::ostringstream* pBuffer, uint32_t number) {
|
ASSERT(number <= 0xFFFF);
|
*pBuffer << "<";
|
char ans[4];
|
FXSYS_IntToFourHexChars(number, ans);
|
for (size_t i = 0; i < 4; ++i)
|
*pBuffer << ans[i];
|
*pBuffer << ">";
|
}
|
|
// PDF spec 1.7 Section 5.9.2: "Unicode character sequences as expressed in
|
// UTF-16BE encoding." See https://en.wikipedia.org/wiki/UTF-16#Description
|
void AddUnicode(std::ostringstream* pBuffer, uint32_t unicode) {
|
if (unicode >= 0xD800 && unicode <= 0xDFFF)
|
unicode = 0;
|
|
char ans[8];
|
*pBuffer << "<";
|
size_t numChars = FXSYS_ToUTF16BE(unicode, ans);
|
for (size_t i = 0; i < numChars; ++i)
|
*pBuffer << ans[i];
|
*pBuffer << ">";
|
}
|
|
// Loads the charcode to unicode mapping into a stream
|
CPDF_Stream* LoadUnicode(CPDF_Document* pDoc,
|
const std::map<uint32_t, uint32_t>& to_unicode) {
|
// A map charcode->unicode
|
std::map<uint32_t, uint32_t> char_to_uni;
|
// A map <char_start, char_end> to vector v of unicode characters of size (end
|
// - start + 1). This abbreviates: start->v[0], start+1->v[1], etc. PDF spec
|
// 1.7 Section 5.9.2 says that only the last byte of the unicode may change.
|
std::map<std::pair<uint32_t, uint32_t>, std::vector<uint32_t>>
|
map_range_vector;
|
// A map <start, end> -> unicode
|
// This abbreviates: start->unicode, start+1->unicode+1, etc.
|
// PDF spec 1.7 Section 5.9.2 says that only the last byte of the unicode may
|
// change.
|
std::map<std::pair<uint32_t, uint32_t>, uint32_t> map_range;
|
|
// Calculate the maps
|
for (auto iter = to_unicode.begin(); iter != to_unicode.end(); ++iter) {
|
uint32_t firstCharcode = iter->first;
|
uint32_t firstUnicode = iter->second;
|
if (std::next(iter) == to_unicode.end() ||
|
firstCharcode + 1 != std::next(iter)->first) {
|
char_to_uni[firstCharcode] = firstUnicode;
|
continue;
|
}
|
++iter;
|
uint32_t curCharcode = iter->first;
|
uint32_t curUnicode = iter->second;
|
if (curCharcode % 256 == 0) {
|
char_to_uni[firstCharcode] = firstUnicode;
|
char_to_uni[curCharcode] = curUnicode;
|
continue;
|
}
|
const size_t maxExtra = 255 - (curCharcode % 256);
|
auto next_it = std::next(iter);
|
if (firstUnicode + 1 != curUnicode) {
|
// Consecutive charcodes mapping to non-consecutive unicodes
|
std::vector<uint32_t> unicodes;
|
unicodes.push_back(firstUnicode);
|
unicodes.push_back(curUnicode);
|
for (size_t i = 0; i < maxExtra; ++i) {
|
if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first)
|
break;
|
++iter;
|
++curCharcode;
|
unicodes.push_back(iter->second);
|
next_it = std::next(iter);
|
}
|
ASSERT(iter->first - firstCharcode + 1 == unicodes.size());
|
map_range_vector[std::make_pair(firstCharcode, iter->first)] = unicodes;
|
continue;
|
}
|
// Consecutive charcodes mapping to consecutive unicodes
|
for (size_t i = 0; i < maxExtra; ++i) {
|
if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first ||
|
curUnicode + 1 != next_it->second) {
|
break;
|
}
|
++iter;
|
++curCharcode;
|
++curUnicode;
|
next_it = std::next(iter);
|
}
|
map_range[std::make_pair(firstCharcode, curCharcode)] = firstUnicode;
|
}
|
std::ostringstream buffer;
|
buffer << ToUnicodeStart;
|
// Add maps to buffer
|
buffer << static_cast<uint32_t>(char_to_uni.size()) << " beginbfchar\n";
|
for (const auto& iter : char_to_uni) {
|
AddCharcode(&buffer, iter.first);
|
buffer << " ";
|
AddUnicode(&buffer, iter.second);
|
buffer << "\n";
|
}
|
buffer << "endbfchar\n"
|
<< static_cast<uint32_t>(map_range_vector.size() + map_range.size())
|
<< " beginbfrange\n";
|
for (const auto& iter : map_range_vector) {
|
const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
|
AddCharcode(&buffer, charcodeRange.first);
|
buffer << " ";
|
AddCharcode(&buffer, charcodeRange.second);
|
buffer << " [";
|
const std::vector<uint32_t>& unicodes = iter.second;
|
for (size_t i = 0; i < unicodes.size(); ++i) {
|
uint32_t uni = unicodes[i];
|
AddUnicode(&buffer, uni);
|
if (i != unicodes.size() - 1)
|
buffer << " ";
|
}
|
buffer << "]\n";
|
}
|
for (const auto& iter : map_range) {
|
const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
|
AddCharcode(&buffer, charcodeRange.first);
|
buffer << " ";
|
AddCharcode(&buffer, charcodeRange.second);
|
buffer << " ";
|
AddUnicode(&buffer, iter.second);
|
buffer << "\n";
|
}
|
buffer << "endbfrange\n";
|
buffer << ToUnicodeEnd;
|
// TODO(npm): Encrypt / Compress?
|
CPDF_Stream* stream = pDoc->NewIndirect<CPDF_Stream>();
|
stream->SetData(&buffer);
|
return stream;
|
}
|
|
const uint32_t kMaxSimpleFontChar = 0xFF;
|
|
void* LoadSimpleFont(CPDF_Document* pDoc,
|
std::unique_ptr<CFX_Font> pFont,
|
const uint8_t* data,
|
uint32_t size,
|
int font_type) {
|
CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>();
|
fontDict->SetNewFor<CPDF_Name>("Type", "Font");
|
fontDict->SetNewFor<CPDF_Name>(
|
"Subtype", font_type == FPDF_FONT_TYPE1 ? "Type1" : "TrueType");
|
ByteString name = pFont->GetFaceName();
|
if (name.IsEmpty())
|
name = "Unnamed";
|
fontDict->SetNewFor<CPDF_Name>("BaseFont", name);
|
|
uint32_t glyphIndex;
|
uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex);
|
if (currentChar > kMaxSimpleFontChar || glyphIndex == 0)
|
return nullptr;
|
fontDict->SetNewFor<CPDF_Number>("FirstChar", static_cast<int>(currentChar));
|
CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>();
|
while (true) {
|
widthsArray->AddNew<CPDF_Number>(pFont->GetGlyphWidth(glyphIndex));
|
uint32_t nextChar =
|
FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex);
|
// Simple fonts have 1-byte charcodes only.
|
if (nextChar > kMaxSimpleFontChar || glyphIndex == 0)
|
break;
|
for (uint32_t i = currentChar + 1; i < nextChar; i++)
|
widthsArray->AddNew<CPDF_Number>(0);
|
currentChar = nextChar;
|
}
|
fontDict->SetNewFor<CPDF_Number>("LastChar", static_cast<int>(currentChar));
|
fontDict->SetNewFor<CPDF_Reference>("Widths", pDoc, widthsArray->GetObjNum());
|
CPDF_Dictionary* fontDesc =
|
LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type);
|
|
fontDict->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
|
fontDesc->GetObjNum());
|
return pDoc->LoadFont(fontDict);
|
}
|
|
const uint32_t kMaxUnicode = 0x10FFFF;
|
|
void* LoadCompositeFont(CPDF_Document* pDoc,
|
std::unique_ptr<CFX_Font> pFont,
|
const uint8_t* data,
|
uint32_t size,
|
int font_type) {
|
CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>();
|
fontDict->SetNewFor<CPDF_Name>("Type", "Font");
|
fontDict->SetNewFor<CPDF_Name>("Subtype", "Type0");
|
// TODO(npm): Get the correct encoding, if it's not identity.
|
ByteString encoding = "Identity-H";
|
fontDict->SetNewFor<CPDF_Name>("Encoding", encoding);
|
ByteString name = pFont->GetFaceName();
|
if (name.IsEmpty())
|
name = "Unnamed";
|
fontDict->SetNewFor<CPDF_Name>(
|
"BaseFont", font_type == FPDF_FONT_TYPE1 ? name + "-" + encoding : name);
|
|
CPDF_Dictionary* pCIDFont = pDoc->NewIndirect<CPDF_Dictionary>();
|
pCIDFont->SetNewFor<CPDF_Name>("Type", "Font");
|
pCIDFont->SetNewFor<CPDF_Name>("Subtype", font_type == FPDF_FONT_TYPE1
|
? "CIDFontType0"
|
: "CIDFontType2");
|
pCIDFont->SetNewFor<CPDF_Name>("BaseFont", name);
|
|
// TODO(npm): Maybe use FT_Get_CID_Registry_Ordering_Supplement to get the
|
// CIDSystemInfo
|
CPDF_Dictionary* pCIDSystemInfo = pDoc->NewIndirect<CPDF_Dictionary>();
|
pCIDSystemInfo->SetNewFor<CPDF_Name>("Registry", "Adobe");
|
pCIDSystemInfo->SetNewFor<CPDF_Name>("Ordering", "Identity");
|
pCIDSystemInfo->SetNewFor<CPDF_Number>("Supplement", 0);
|
pCIDFont->SetNewFor<CPDF_Reference>("CIDSystemInfo", pDoc,
|
pCIDSystemInfo->GetObjNum());
|
|
CPDF_Dictionary* fontDesc =
|
LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type);
|
pCIDFont->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
|
fontDesc->GetObjNum());
|
|
uint32_t glyphIndex;
|
uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex);
|
// If it doesn't have a single char, just fail
|
if (glyphIndex == 0 || currentChar > kMaxUnicode)
|
return nullptr;
|
|
std::map<uint32_t, uint32_t> to_unicode;
|
std::map<uint32_t, uint32_t> widths;
|
while (true) {
|
if (currentChar > kMaxUnicode)
|
break;
|
|
widths[glyphIndex] = pFont->GetGlyphWidth(glyphIndex);
|
to_unicode[glyphIndex] = currentChar;
|
currentChar =
|
FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex);
|
if (glyphIndex == 0)
|
break;
|
}
|
CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>();
|
for (auto it = widths.begin(); it != widths.end(); ++it) {
|
int ch = it->first;
|
int w = it->second;
|
if (std::next(it) == widths.end()) {
|
// Only one char left, use format c [w]
|
auto oneW = pdfium::MakeUnique<CPDF_Array>();
|
oneW->AddNew<CPDF_Number>(w);
|
widthsArray->AddNew<CPDF_Number>(ch);
|
widthsArray->Add(std::move(oneW));
|
break;
|
}
|
++it;
|
int next_ch = it->first;
|
int next_w = it->second;
|
if (next_ch == ch + 1 && next_w == w) {
|
// The array can have a group c_first c_last w: all CIDs in the range from
|
// c_first to c_last will have width w
|
widthsArray->AddNew<CPDF_Number>(ch);
|
ch = next_ch;
|
while (true) {
|
auto next_it = std::next(it);
|
if (next_it == widths.end() || next_it->first != it->first + 1 ||
|
next_it->second != it->second) {
|
break;
|
}
|
++it;
|
ch = it->first;
|
}
|
widthsArray->AddNew<CPDF_Number>(ch);
|
widthsArray->AddNew<CPDF_Number>(w);
|
continue;
|
}
|
// Otherwise we can have a group of the form c [w1 w2 ...]: c has width
|
// w1, c+1 has width w2, etc.
|
widthsArray->AddNew<CPDF_Number>(ch);
|
auto curWidthArray = pdfium::MakeUnique<CPDF_Array>();
|
curWidthArray->AddNew<CPDF_Number>(w);
|
curWidthArray->AddNew<CPDF_Number>(next_w);
|
while (true) {
|
auto next_it = std::next(it);
|
if (next_it == widths.end() || next_it->first != it->first + 1)
|
break;
|
++it;
|
curWidthArray->AddNew<CPDF_Number>(static_cast<int>(it->second));
|
}
|
widthsArray->Add(std::move(curWidthArray));
|
}
|
pCIDFont->SetNewFor<CPDF_Reference>("W", pDoc, widthsArray->GetObjNum());
|
// TODO(npm): Support vertical writing
|
|
auto pDescendant = pdfium::MakeUnique<CPDF_Array>();
|
pDescendant->AddNew<CPDF_Reference>(pDoc, pCIDFont->GetObjNum());
|
fontDict->SetFor("DescendantFonts", std::move(pDescendant));
|
CPDF_Stream* toUnicodeStream = LoadUnicode(pDoc, to_unicode);
|
fontDict->SetNewFor<CPDF_Reference>("ToUnicode", pDoc,
|
toUnicodeStream->GetObjNum());
|
return pDoc->LoadFont(fontDict);
|
}
|
|
} // namespace
|
|
FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV
|
FPDFPageObj_NewTextObj(FPDF_DOCUMENT document,
|
FPDF_BYTESTRING font,
|
float font_size) {
|
CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
|
if (!pDoc)
|
return nullptr;
|
|
CPDF_Font* pFont = CPDF_Font::GetStockFont(pDoc, ByteStringView(font));
|
if (!pFont)
|
return nullptr;
|
|
auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
|
pTextObj->m_TextState.SetFont(pFont);
|
pTextObj->m_TextState.SetFontSize(font_size);
|
pTextObj->DefaultStates();
|
return pTextObj.release(); // Caller takes ownership.
|
}
|
|
FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
|
FPDFText_SetText(FPDF_PAGEOBJECT text_object, FPDF_WIDESTRING text) {
|
auto* pTextObj = static_cast<CPDF_TextObject*>(text_object);
|
if (!pTextObj)
|
return false;
|
|
size_t len = WideString::WStringLength(text);
|
WideString encodedText = WideString::FromUTF16LE(text, len);
|
ByteString byteText;
|
for (wchar_t wc : encodedText) {
|
pTextObj->GetFont()->AppendChar(
|
&byteText, pTextObj->GetFont()->CharCodeFromUnicode(wc));
|
}
|
pTextObj->SetText(byteText);
|
return true;
|
}
|
|
FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadFont(FPDF_DOCUMENT document,
|
const uint8_t* data,
|
uint32_t size,
|
int font_type,
|
FPDF_BOOL cid) {
|
CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
|
if (!pDoc || !data || size == 0 ||
|
(font_type != FPDF_FONT_TYPE1 && font_type != FPDF_FONT_TRUETYPE)) {
|
return nullptr;
|
}
|
|
auto pFont = pdfium::MakeUnique<CFX_Font>();
|
|
// TODO(npm): Maybe use FT_Get_X11_Font_Format to check format? Otherwise, we
|
// are allowing giving any font that can be loaded on freetype and setting it
|
// as any font type.
|
if (!pFont->LoadEmbedded(data, size))
|
return nullptr;
|
|
return cid ? LoadCompositeFont(pDoc, std::move(pFont), data, size, font_type)
|
: LoadSimpleFont(pDoc, std::move(pFont), data, size, font_type);
|
}
|
|
FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
|
FPDFText_SetFillColor(FPDF_PAGEOBJECT text_object,
|
unsigned int R,
|
unsigned int G,
|
unsigned int B,
|
unsigned int A) {
|
return FPDFPageObj_SetFillColor(text_object, R, G, B, A);
|
}
|
|
FPDF_EXPORT void FPDF_CALLCONV FPDFFont_Close(FPDF_FONT font) {
|
CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
|
if (!pFont)
|
return;
|
|
CPDF_Document* pDoc = pFont->GetDocument();
|
if (!pDoc)
|
return;
|
|
CPDF_DocPageData* pPageData = pDoc->GetPageData();
|
if (!pPageData->IsForceClear())
|
pPageData->ReleaseFont(pFont->GetFontDict());
|
}
|
|
FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV
|
FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document,
|
FPDF_FONT font,
|
float font_size) {
|
CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
|
CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
|
if (!pDoc || !pFont)
|
return nullptr;
|
|
auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
|
pTextObj->m_TextState.SetFont(pDoc->LoadFont(pFont->GetFontDict()));
|
pTextObj->m_TextState.SetFontSize(font_size);
|
pTextObj->DefaultStates();
|
return pTextObj.release();
|
}
|