Working with PDF documents is a common task in modern programming. It is often necessary to extract text or obtain information about its size and boundaries for subsequent processing or analysis. Reading additional information about the text in a PDF document is a common task that can be performed using the SautinSoft.PDF library.
In this article, we will look at how to use this SDK to get text borders and sizes from a PDF document using the C# programming language and the .NET platform.
Step-by-step guide:
Input file: Reading additional information about a text.pdf
Полный код
using System;
using System.IO;
using SautinSoft;
using SautinSoft.Pdf;
using SautinSoft.Pdf.Content;
class Program
{
/// <summary>
/// Reading additional info.
/// </summary>
/// <remarks>
/// Details: https://sautinsoft.com/products/pdf/help/net/developer-guide/reading-additional-information.php
/// </remarks>
static void Main()
{
// Before starting this example, please get a free 100-day trial key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// PdfDocument.SetLicense("...");
string pdfFile = Path.GetFullPath(@"..\..\..\table.pdf");
// Iterate through all PDF pages and through each page's content elements,
// and retrieve only the text content elements.
using (var document = PdfDocument.Load(pdfFile))
{
foreach (var page in document.Pages)
{
var contentEnumerator = page.Content.Elements.All(page.Transform).GetEnumerator();
while (contentEnumerator.MoveNext())
{
if (contentEnumerator.Current.ElementType == PdfContentElementType.Text)
{
var textElement = (PdfTextContent)contentEnumerator.Current;
var text = textElement.ToString();
var font = textElement.Format.Text.Font;
var color = textElement.Format.Fill.Color;
var bounds = textElement.Bounds;
contentEnumerator.Transform.Transform(bounds);
// Read the text content element's additional information.
Console.WriteLine($"Unicode text: {text}");
Console.WriteLine($"Font name: {font.Face.Family.Name}");
Console.WriteLine($"Font size: {font.Size}");
Console.WriteLine($"Font style: {font.Face.Style}");
Console.WriteLine($"Font weight: {font.Face.Weight}");
if (color.TryGetRgb(out double red, out double green, out double blue))
Console.WriteLine($"Color: Red={red}, Green={green}, Blue={blue}");
Console.WriteLine($"Bounds: Left={bounds.Left:0.00}, Bottom={bounds.Bottom:0.00}, Right={bounds.Right:0.00}, Top={bounds.Top:0.00}");
Console.WriteLine();
}
}
}
}
}
}
Option Infer On
Imports System
Imports System.IO
Imports SautinSoft
Imports SautinSoft.Pdf
Imports SautinSoft.Pdf.Content
Friend Class Program
''' <summary>
''' Reading additional info.
''' </summary>
''' <remarks>
''' Details: https://sautinsoft.com/products/pdf/help/net/developer-guide/reading-additional-information.php
''' </remarks>
Shared Sub Main()
' Before starting this example, please get a free license:
' https://sautinsoft.com/start-for-free/
' Apply the key here:
' PdfDocument.SetLicense("...");
Dim pdfFile As String = Path.GetFullPath("..\..\..\table.pdf")
' Iterate through all PDF pages and through each page's content elements,
' and retrieve only the text content elements.
Using document = PdfDocument.Load(pdfFile)
For Each page In document.Pages
Dim contentEnumerator = page.Content.Elements.All(page.Transform).GetEnumerator()
Do While contentEnumerator.MoveNext()
If contentEnumerator.Current.ElementType = PdfContentElementType.Text Then
Dim textElement = CType(contentEnumerator.Current, PdfTextContent)
Dim text = textElement.ToString()
Dim font = textElement.Format.Text.Font
Dim color = textElement.Format.Fill.Color
Dim bounds = textElement.Bounds
contentEnumerator.Transform.Transform(bounds)
' Read the text content element's additional information.
Console.WriteLine($"Unicode text: {text}")
Console.WriteLine($"Font name: {font.Face.Family.Name}")
Console.WriteLine($"Font size: {font.Size}")
Console.WriteLine($"Font style: {font.Face.Style}")
Console.WriteLine($"Font weight: {font.Face.Weight}")
Dim red As Double
Dim green As Double
Dim blue As Double
If color.TryGetRgb(red, green, blue) Then
Console.WriteLine($"Color: Red={red}, Green={green}, Blue={blue}")
End If
Console.WriteLine($"Bounds: Left={bounds.Left:0.00}, Bottom={bounds.Bottom:0.00}, Right={bounds.Right:0.00}, Top={bounds.Top:0.00}")
Console.WriteLine()
End If
Loop
Next page
End Using
End Sub
End Class
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.ru или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: