Полный код
using System;
using System.IO;
using System.Linq;
using SautinSoft.Document;
using SautinSoft.Document.Drawing;
using SautinSoft.Document.Tables;
namespace Example
{
class Program
{
static void Main(string[] args)
{
// Get your free 100-day key here:
// https://sautinsoft.com/start-for-free/
CaptureTextZoneByXY();
}
/// <summary>
/// How to capture text and images from the existing PDF, DOCX, any document by specific (x,y) coordinates
/// </summary>
/// <remarks>
/// Details: https://sautinsoft.com/products/document/help/net/developer-guide/capture-text-images-in-pdf-docx-document-by-specific-zone-x-y-coordinates-net-csharp-vb.php
/// </remarks>
static void CaptureTextZoneByXY()
{
// Let us say, we want to capture text and graphics into:
// Left-Top:(0, 50) mm,
var mmXY = (0f, 50f);
// Width: 250 mm, Height: 150 mm.
var mmWH = (250f, 150f);
// Zero-page index, e.g. page 1 has index 0.
int[] pageCollection = new int[1] { 0 };
// Convert mm to points
double leftX = LengthUnitConverter.Convert(mmXY.Item1, LengthUnit.Millimeter, LengthUnit.Point);
double topY = LengthUnitConverter.Convert(mmXY.Item2, LengthUnit.Millimeter, LengthUnit.Point);
double width = LengthUnitConverter.Convert(mmWH.Item1, LengthUnit.Millimeter, LengthUnit.Point);
double height = LengthUnitConverter.Convert(mmWH.Item2, LengthUnit.Millimeter, LengthUnit.Point);
string inpFile = Path.GetFullPath(@"..\..\..\Potato Beetle.pdf");
string outFile = "Result.docx";
// 1. Load an existing document, load only specigic pages.
PdfLoadOptions opt = new PdfLoadOptions()
{
SelectedPages = pageCollection,
DetectTables = true,
ConversionMode = PdfConversionMode.Flowing,
};
DocumentCore dc = DocumentCore.Load(inpFile, opt);
// 2. Create new document to store captured data.
DocumentCore dcCaptured = new DocumentCore();
// Create import session.
ImportSession session = new ImportSession(dc, dcCaptured, StyleImportingMode.KeepSourceFormatting);
// 3. Iterate through document pages
// and capture elements by (X,Y) and (width, height).
var paginator = dc.GetPaginator(new PaginatorOptions() { UpdateFields = true });
int pageIndex = 0;
foreach (var page in paginator.Pages)
{
Section importedSection = null;
foreach (var elementFrame in page.GetElementFrames(
ElementType.Paragraph,
ElementType.Table
))
{
// Is element inside capturing zone?
if (elementFrame.Bounds.Left >= leftX &&
elementFrame.Bounds.Left <= leftX + width &&
elementFrame.Bounds.Top >= topY &&
elementFrame.Bounds.Top <= topY + height)
{
if (importedSection == null)
{
importedSection = dcCaptured.Import<Section>(dc.Sections[pageIndex], false, session);
dcCaptured.Sections.Add(importedSection);
}
if (elementFrame.Element is Paragraph par)
{
var importedPar = dcCaptured.Import<Paragraph>(par, true, session);
importedSection.Blocks.Add(importedPar);
}
else if (elementFrame.Element is Table table)
{
var importedTable = dcCaptured.Import<Table>(table, true, session);
importedSection.Blocks.Add(importedTable);
}
}
}
pageIndex++;
}
dcCaptured.Save(outFile);
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
}
}
}
Imports System
Imports System.IO
Imports System.Linq
Imports SautinSoft.Document
Imports SautinSoft.Document.Drawing
Imports SautinSoft.Document.Tables
Namespace Example
Friend Class Program
Shared Sub Main(ByVal args() As String)
' Get your free 100-day key here:
' https://sautinsoft.com/start-for-free/
CaptureTextZoneByXY()
End Sub
''' <summary>
''' How to capture text and images from the existing PDF, DOCX, any document by specific (x,y) coordinates
''' </summary>
''' <remarks>
''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/capture-text-images-in-pdf-docx-document-by-specific-zone-x-y-coordinates-net-csharp-vb.php
''' </remarks>
Private Shared Sub CaptureTextZoneByXY()
' Let us say, we want to capture text and graphics into:
' Left-Top:(0, 50) mm,
Dim mmXY = (0F, 50.0F)
' Width: 250 mm, Height: 150 mm.
Dim mmWH = (250.0F, 150.0F)
' Zero-page index, e.g. page 1 has index 0.
Dim pageCollection() As Integer = {0}
' Convert mm to points
Dim leftX As Double = LengthUnitConverter.Convert(mmXY.Item1, LengthUnit.Millimeter, LengthUnit.Point)
Dim topY As Double = LengthUnitConverter.Convert(mmXY.Item2, LengthUnit.Millimeter, LengthUnit.Point)
Dim width As Double = LengthUnitConverter.Convert(mmWH.Item1, LengthUnit.Millimeter, LengthUnit.Point)
Dim height As Double = LengthUnitConverter.Convert(mmWH.Item2, LengthUnit.Millimeter, LengthUnit.Point)
Dim inpFile As String = Path.GetFullPath("..\..\..\Potato Beetle.pdf")
Dim outFile As String = "Result.docx"
' 1. Load an existing document, load only specigic pages.
Dim opt As New PdfLoadOptions() With {
.SelectedPages = pageCollection,
.DetectTables = True,
.ConversionMode = PdfConversionMode.Flowing
}
Dim dc As DocumentCore = DocumentCore.Load(inpFile, opt)
' 2. Create new document to store captured data.
Dim dcCaptured As New DocumentCore()
' Create import session.
Dim session As New ImportSession(dc, dcCaptured, StyleImportingMode.KeepSourceFormatting)
' 3. Iterate through document pages
' and capture elements by (X,Y) and (width, height).
Dim paginator = dc.GetPaginator(New PaginatorOptions() With {.UpdateFields = True})
Dim pageIndex As Integer = 0
For Each page In paginator.Pages
Dim importedSection As Section = Nothing
For Each elementFrame In page.GetElementFrames(ElementType.Paragraph, ElementType.Table)
' Is element inside capturing zone?
If elementFrame.Bounds.Left >= leftX AndAlso elementFrame.Bounds.Left <= leftX + width AndAlso elementFrame.Bounds.Top >= topY AndAlso elementFrame.Bounds.Top <= topY + height Then
If importedSection Is Nothing Then
importedSection = dcCaptured.Import(Of Section)(dc.Sections(pageIndex), False, session)
dcCaptured.Sections.Add(importedSection)
End If
Dim tempVar As Boolean = TypeOf elementFrame.Element Is Paragraph
Dim par As Paragraph = If(tempVar, CType(elementFrame.Element, Paragraph), Nothing)
If tempVar Then
Dim importedPar = dcCaptured.Import(Of Paragraph)(par, True, session)
importedSection.Blocks.Add(importedPar)
Else
Dim tempVar2 As Boolean = TypeOf elementFrame.Element Is Table
Dim table As Table = If(tempVar2, CType(elementFrame.Element, Table), Nothing)
If tempVar2 Then
Dim importedTable = dcCaptured.Import(Of Table)(table, True, session)
importedSection.Blocks.Add(importedTable)
End If
End If
End If
Next elementFrame
pageIndex += 1
Next page
dcCaptured.Save(outFile)
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
End Sub
End Class
End Namespace
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.ru или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: