Working with PDF documents often requires extracting data from tables and exporting it to text files for further analysis or processing. In this article, we will look at how to use C# and .NET perform these tasks using the SautinSoft PDF.Net library. Extracting data from tables in PDF documents can be useful for analyzing data, converting it to other formats, or for further processing.
After extracting data from tables, it may be necessary to export them to a text file for further analysis or processing. Let's look at an example code that demonstrates how to find and extract tables from a PDF document, as well as how to export table data to a text file:
Полный код
using System;
using System.Globalization;
using System.IO;
using System.Reflection;
using System.Text.Json;
using SautinSoft;
using SautinSoft.Pdf;
using SautinSoft.Pdf.Content;
namespace Sample
{
class Sample
{
/// <summary>
/// Find Tables
/// </summary>
/// <remarks>
/// Details: https://sautinsoft.com/products/pdf/help/net/developer-guide/export-data-from-table-to-txt.php
/// </remarks>
static void Main(string[] args)
{
// Before starting this example, please get a free 100-day trial key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// PdfDocument.SetLicense("...");
string pdfFile = Path.GetFullPath(@"..\..\..\tables.pdf");
var writer = new StringWriter(CultureInfo.InvariantCulture);
using (var document = PdfDocument.Load(pdfFile))
{
// Find Tables.
var tables = document.Pages[0].Content.FindTables();
string format = "{0,-20}|{1,-20}", separator = new string('-', 40);
// Get text from tables.
foreach (var table in tables)
{
foreach (var row in table.Rows)
{
writer.WriteLine(format, row.Cells[0].ToString(), row.Cells[1].ToString());
writer.WriteLine(separator);
}
writer.WriteLine();
}
}
var file = new FileStream("Output.txt", FileMode.Create);
StreamWriter streamWriter = new StreamWriter(file);
streamWriter.WriteLine(writer.ToString());
streamWriter.Close();
file.Close();
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo("Output.txt") { UseShellExecute = true });
}
}
}
Option Infer On
Imports System
Imports System.Globalization
Imports System.IO
Imports System.Reflection.Metadata
Imports System.Text.Json
Imports SautinSoft
Imports SautinSoft.Pdf
Imports SautinSoft.Pdf.Content
Namespace Sample
Friend Class Sample
''' <summary>
''' Find Tables
''' </summary>
''' <remarks>
''' Details: https://sautinsoft.com/products/pdf/help/net/developer-guide/export-data-from-table-to-txt.php
''' </remarks>
Shared Sub Main(ByVal args() As String)
' Before starting this example, please get a free 100-day trial key:
' https://sautinsoft.com/start-for-free/
' Apply the key here:
' PdfDocument.SetLicense("...");
Dim pdfFile As String = Path.GetFullPath("..\..\..\tables.pdf")
Dim writer = New StringWriter(CultureInfo.InvariantCulture)
Using document = PdfDocument.Load(pdfFile)
' Find Tables.
Dim tables = document.Pages(0).Content.FindTables()
Dim format As String = "{0,-20}|{1,-20}", separator As New String("-"c, 40)
' Get text from tables.
For Each table In tables
For Each row In table.Rows
writer.WriteLine(format, row.Cells(0).ToString(), row.Cells(1).ToString())
writer.WriteLine(separator)
Next row
writer.WriteLine()
Next table
End Using
Dim file = New FileStream("Output.txt", FileMode.Create)
Dim streamWriter As New StreamWriter(file)
streamWriter.WriteLine(writer.ToString())
streamWriter.Close()
file.Close()
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo("Output.txt") With {.UseShellExecute = True})
End Sub
End Class
End Namespace
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.ru или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: