// Copyright 2014 PDFium Authors. All rights reserved.
|
// Use of this source code is governed by a BSD-style license that can be
|
// found in the LICENSE file.
|
|
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
|
|
#include "public/fpdf_text.h"
|
|
#include <algorithm>
|
#include <vector>
|
|
#include "core/fpdfapi/page/cpdf_page.h"
|
#include "core/fpdfdoc/cpdf_viewerpreferences.h"
|
#include "core/fpdftext/cpdf_linkextract.h"
|
#include "core/fpdftext/cpdf_textpage.h"
|
#include "core/fpdftext/cpdf_textpagefind.h"
|
#include "fpdfsdk/fsdk_define.h"
|
#include "third_party/base/numerics/safe_conversions.h"
|
#include "third_party/base/stl_util.h"
|
|
#ifdef PDF_ENABLE_XFA
|
#include "fpdfsdk/fpdfxfa/cpdfxfa_context.h"
|
#include "fpdfsdk/fpdfxfa/cpdfxfa_page.h"
|
#endif // PDF_ENABLE_XFA
|
|
#ifdef _WIN32
|
#include <tchar.h>
|
#endif
|
|
namespace {
|
|
constexpr size_t kBytesPerCharacter = sizeof(unsigned short);
|
|
CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) {
|
return static_cast<CPDF_TextPage*>(text_page);
|
}
|
|
CPDF_TextPageFind* CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle) {
|
return static_cast<CPDF_TextPageFind*>(handle);
|
}
|
|
CPDF_LinkExtract* CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link) {
|
return static_cast<CPDF_LinkExtract*>(link);
|
}
|
|
} // namespace
|
|
FPDF_EXPORT FPDF_TEXTPAGE FPDF_CALLCONV FPDFText_LoadPage(FPDF_PAGE page) {
|
CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
|
if (!pPDFPage)
|
return nullptr;
|
|
#ifdef PDF_ENABLE_XFA
|
CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
|
CPDFXFA_Context* pContext = pPage->GetContext();
|
CPDF_ViewerPreferences viewRef(pContext->GetPDFDoc());
|
#else // PDF_ENABLE_XFA
|
CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument.Get());
|
#endif // PDF_ENABLE_XFA
|
|
CPDF_TextPage* textpage = new CPDF_TextPage(
|
pPDFPage, viewRef.IsDirectionR2L() ? FPDFText_Direction::Right
|
: FPDFText_Direction::Left);
|
textpage->ParseTextPage();
|
return textpage;
|
}
|
|
FPDF_EXPORT void FPDF_CALLCONV FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
|
delete CPDFTextPageFromFPDFTextPage(text_page);
|
}
|
|
FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
|
if (!text_page)
|
return -1;
|
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
|
return textpage->CountChars();
|
}
|
|
FPDF_EXPORT unsigned int FPDF_CALLCONV
|
FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index) {
|
if (!text_page)
|
return 0;
|
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
|
if (index < 0 || index >= textpage->CountChars())
|
return 0;
|
|
FPDF_CHAR_INFO charinfo;
|
textpage->GetCharInfo(index, &charinfo);
|
return charinfo.m_Unicode;
|
}
|
|
FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
|
int index) {
|
if (!text_page)
|
return 0;
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
|
|
if (index < 0 || index >= textpage->CountChars())
|
return 0;
|
|
FPDF_CHAR_INFO charinfo;
|
textpage->GetCharInfo(index, &charinfo);
|
return charinfo.m_FontSize;
|
}
|
|
FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
|
int index,
|
double* left,
|
double* right,
|
double* bottom,
|
double* top) {
|
if (!text_page || index < 0)
|
return false;
|
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
|
if (index >= textpage->CountChars())
|
return false;
|
|
FPDF_CHAR_INFO charinfo;
|
textpage->GetCharInfo(index, &charinfo);
|
*left = charinfo.m_CharBox.left;
|
*right = charinfo.m_CharBox.right;
|
*bottom = charinfo.m_CharBox.bottom;
|
*top = charinfo.m_CharBox.top;
|
return true;
|
}
|
|
FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
|
FPDFText_GetCharOrigin(FPDF_TEXTPAGE text_page,
|
int index,
|
double* x,
|
double* y) {
|
if (!text_page)
|
return false;
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
|
|
if (index < 0 || index >= textpage->CountChars())
|
return false;
|
FPDF_CHAR_INFO charinfo;
|
textpage->GetCharInfo(index, &charinfo);
|
*x = charinfo.m_Origin.x;
|
*y = charinfo.m_Origin.y;
|
return true;
|
}
|
|
// select
|
FPDF_EXPORT int FPDF_CALLCONV
|
FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
|
double x,
|
double y,
|
double xTolerance,
|
double yTolerance) {
|
if (!text_page)
|
return -3;
|
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
|
return textpage->GetIndexAtPos(
|
CFX_PointF(static_cast<float>(x), static_cast<float>(y)),
|
CFX_SizeF(static_cast<float>(xTolerance),
|
static_cast<float>(yTolerance)));
|
}
|
|
FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE page,
|
int char_start,
|
int char_count,
|
unsigned short* result) {
|
if (!page || char_start < 0 || char_count < 0 || !result)
|
return 0;
|
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(page);
|
int char_available = textpage->CountChars() - char_start;
|
if (char_available <= 0)
|
return 0;
|
|
char_count = std::min(char_count, char_available);
|
if (char_count == 0) {
|
// Writing out "", which has a character count of 1 due to the NUL.
|
*result = '\0';
|
return 1;
|
}
|
|
WideString str = textpage->GetPageText(char_start, char_count);
|
|
if (str.GetLength() > static_cast<size_t>(char_count))
|
str = str.Left(static_cast<size_t>(char_count));
|
|
// UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected
|
// the number of items to stay the same.
|
ByteString byte_str = str.UTF16LE_Encode();
|
size_t byte_str_len = byte_str.GetLength();
|
int ret_count = byte_str_len / kBytesPerCharacter;
|
|
ASSERT(ret_count <= char_count + 1); // +1 to account for the NUL terminator.
|
memcpy(result, byte_str.GetBuffer(byte_str_len), byte_str_len);
|
return ret_count;
|
}
|
|
FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page,
|
int start,
|
int count) {
|
if (!text_page)
|
return 0;
|
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
|
return textpage->CountRects(start, count);
|
}
|
|
FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetRect(FPDF_TEXTPAGE text_page,
|
int rect_index,
|
double* left,
|
double* top,
|
double* right,
|
double* bottom) {
|
if (!text_page)
|
return false;
|
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
|
CFX_FloatRect rect;
|
bool result = textpage->GetRect(rect_index, &rect);
|
|
*left = rect.left;
|
*top = rect.top;
|
*right = rect.right;
|
*bottom = rect.bottom;
|
return result;
|
}
|
|
FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
|
double left,
|
double top,
|
double right,
|
double bottom,
|
unsigned short* buffer,
|
int buflen) {
|
if (!text_page)
|
return 0;
|
|
CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
|
CFX_FloatRect rect((float)left, (float)bottom, (float)right, (float)top);
|
WideString str = textpage->GetTextByRect(rect);
|
|
if (buflen <= 0 || !buffer)
|
return str.GetLength();
|
|
ByteString cbUTF16Str = str.UTF16LE_Encode();
|
int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
|
int size = buflen > len ? len : buflen;
|
memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
|
size * sizeof(unsigned short));
|
cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
|
|
return size;
|
}
|
|
// Search
|
// -1 for end
|
FPDF_EXPORT FPDF_SCHHANDLE FPDF_CALLCONV
|
FPDFText_FindStart(FPDF_TEXTPAGE text_page,
|
FPDF_WIDESTRING findwhat,
|
unsigned long flags,
|
int start_index) {
|
if (!text_page)
|
return nullptr;
|
|
CPDF_TextPageFind* textpageFind =
|
new CPDF_TextPageFind(CPDFTextPageFromFPDFTextPage(text_page));
|
size_t len = WideString::WStringLength(findwhat);
|
textpageFind->FindFirst(
|
WideString::FromUTF16LE(findwhat, len), flags,
|
start_index >= 0 ? Optional<size_t>(start_index) : Optional<size_t>());
|
return textpageFind;
|
}
|
|
FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindNext(FPDF_SCHHANDLE handle) {
|
if (!handle)
|
return false;
|
|
CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
|
return textpageFind->FindNext();
|
}
|
|
FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
|
if (!handle)
|
return false;
|
|
CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
|
return textpageFind->FindPrev();
|
}
|
|
FPDF_EXPORT int FPDF_CALLCONV
|
FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
|
if (!handle)
|
return 0;
|
|
CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
|
return textpageFind->GetCurOrder();
|
}
|
|
FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
|
if (!handle)
|
return 0;
|
|
CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
|
return textpageFind->GetMatchedCount();
|
}
|
|
FPDF_EXPORT void FPDF_CALLCONV FPDFText_FindClose(FPDF_SCHHANDLE handle) {
|
if (!handle)
|
return;
|
|
CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
|
delete textpageFind;
|
handle = nullptr;
|
}
|
|
// web link
|
FPDF_EXPORT FPDF_PAGELINK FPDF_CALLCONV
|
FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
|
if (!text_page)
|
return nullptr;
|
|
CPDF_LinkExtract* pageLink =
|
new CPDF_LinkExtract(CPDFTextPageFromFPDFTextPage(text_page));
|
pageLink->ExtractLinks();
|
return pageLink;
|
}
|
|
FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
|
if (!link_page)
|
return 0;
|
|
CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
|
return pdfium::base::checked_cast<int>(pageLink->CountLinks());
|
}
|
|
FPDF_EXPORT int FPDF_CALLCONV FPDFLink_GetURL(FPDF_PAGELINK link_page,
|
int link_index,
|
unsigned short* buffer,
|
int buflen) {
|
WideString wsUrl(L"");
|
if (link_page && link_index >= 0) {
|
CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
|
wsUrl = pageLink->GetURL(link_index);
|
}
|
ByteString cbUTF16URL = wsUrl.UTF16LE_Encode();
|
int required = cbUTF16URL.GetLength() / sizeof(unsigned short);
|
if (!buffer || buflen <= 0)
|
return required;
|
|
int size = std::min(required, buflen);
|
if (size > 0) {
|
int buf_size = size * sizeof(unsigned short);
|
memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size);
|
}
|
return size;
|
}
|
|
FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountRects(FPDF_PAGELINK link_page,
|
int link_index) {
|
if (!link_page || link_index < 0)
|
return 0;
|
|
CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
|
return pdfium::CollectionSize<int>(pageLink->GetRects(link_index));
|
}
|
|
FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFLink_GetRect(FPDF_PAGELINK link_page,
|
int link_index,
|
int rect_index,
|
double* left,
|
double* top,
|
double* right,
|
double* bottom) {
|
if (!link_page || link_index < 0 || rect_index < 0)
|
return false;
|
|
CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
|
std::vector<CFX_FloatRect> rectArray = pageLink->GetRects(link_index);
|
if (rect_index >= pdfium::CollectionSize<int>(rectArray))
|
return false;
|
|
*left = rectArray[rect_index].left;
|
*right = rectArray[rect_index].right;
|
*top = rectArray[rect_index].top;
|
*bottom = rectArray[rect_index].bottom;
|
return true;
|
}
|
|
FPDF_EXPORT void FPDF_CALLCONV FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
|
delete CPDFLinkExtractFromFPDFPageLink(link_page);
|
}
|