Бывают случаи, когда с помощью ключевых слов вам нужно найти, в каких абзацах эти возникают слова. Эти текстовые данные могут быть сохранены в форматах PDF, DOCX или RTF.
В этом примере кода мы выведем на консоль все абзацы, полностью содержащие слово "company".
Полный код
using System;
using System.IO;
using SautinSoft.Document;
using SautinSoft.Document.Drawing;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace Example
{
class Program
{
static void Main(string[] args)
{
{
FindWordInParagraph();
}
}
/// <summary>
/// Find any "word" in a folder with PDF files inside and show a paragraph, where this word will be found.
/// You may change the extension: pdf, docx, rtf.
/// </summary>
/// <remarks>
/// Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-show-paragraph-containing-required-word-in-csharp-vb-net.php
/// </remarks>
static void FindWordInParagraph()
{
// A regular expression (shortened as regex or regexp; sometimes referred to as rational expression) is a sequence of characters that specifies a search pattern in text.
Regex regex = new Regex(@"\bcompany\b", RegexOptions.IgnoreCase);
// Loop through all PDF files in a directory.
foreach (string file in Directory.EnumerateFiles(@"..\..\files\", "*.pdf", SearchOption.AllDirectories))
{
DocumentCore dc = DocumentCore.Load(file);
// Provides a functionality to paginate the document content.
DocumentPaginator dp = dc.GetPaginator();
foreach (ContentRange content in dc.Content.Find(regex))
{
ElementFrame ef = dp.GetElementFrames().FirstOrDefault(e => content.Start.Equals(e.Content.Start));
Paragraph paragraph = content.Start.Parent.Parent as Paragraph;
// We are looking for a sentence in which this word was found.
string sentence = paragraph.Content.ToString().Trim();
Console.WriteLine("Filename: " + file + "\r\n" + sentence);
// The coordinates of the found word.
Console.WriteLine("Info:" + ef.Bounds.ToString());
Console.WriteLine("Next paragraph?");
Console.ReadKey();
}
}
}
}
}
Imports Microsoft.VisualBasic
Imports System
Imports System.IO
Imports SautinSoft.Document
Imports SautinSoft.Document.Drawing
Imports System.Collections.Generic
Imports System.Linq
Imports System.Text
Imports System.Text.RegularExpressions
Namespace Example
Friend Class Program
Shared Sub Main(ByVal args() As String)
If True Then
FindWordInParagraph()
End If
End Sub
''' <summary>
''' Find any "word" in a folder with PDF files inside and show a paragraph, where this word will be found.
''' You may change the extension: pdf, docx, rtf.
''' </summary>
''' <remarks>
''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-show-paragraph-containing-required-word-in-csharp-vb-net.php
''' </remarks>
Private Shared Sub FindWordInParagraph()
' A regular expression (shortened as regex or regexp; sometimes referred to as rational expression) is a sequence of characters that specifies a search pattern in text.
Dim regex As New Regex("\bcompany\b", RegexOptions.IgnoreCase)
' Loop through all PDF files in a directory.
For Each file As String In Directory.EnumerateFiles("..\files\", "*.pdf", SearchOption.AllDirectories)
Dim dc As DocumentCore = DocumentCore.Load(file)
' Provides a functionality to paginate the document content.
Dim dp As DocumentPaginator = dc.GetPaginator()
For Each content As ContentRange In dc.Content.Find(regex)
Dim ef As ElementFrame = dp.GetElementFrames().FirstOrDefault(Function(e) content.Start.Equals(e.Content.Start))
Dim paragraph As Paragraph = TryCast(content.Start.Parent.Parent, Paragraph)
' We are looking for a sentence in which this word was found.
Dim sentence As String = paragraph.Content.ToString().Trim()
Console.WriteLine("Filename: " & file & vbCrLf & sentence)
' The coordinates of the found word.
Console.WriteLine("Info:" & ef.Bounds.ToString())
Console.WriteLine("Next paragraph?")
Console.ReadKey()
Next content
Next file
End Sub
End Class
End Namespace
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу [email protected] или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: