Как показать абзац, содержащий нужное слово, с помощью C# и .NET


    Бывают случаи, когда с помощью ключевых слов вам нужно найти, в каких абзацах эти возникают слова. Эти текстовые данные могут быть сохранены в форматах PDF, DOCX или RTF.

В этом примере кода мы выведем на консоль все абзацы, полностью содержащие слово "company".

Полный код

using System;
using System.IO;
using SautinSoft.Document;
using SautinSoft.Document.Drawing;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace Example
{
    class Program
    {
        static void Main(string[] args)
        {
            {
                FindWordInParagraph();
            }
        }
        /// <summary>
        /// Find any "word" in a folder with PDF files inside and show a paragraph, where this word will be found.
        /// You may change the extension: pdf, docx, rtf. 
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-show-paragraph-containing-required-word-in-csharp-vb-net.php
        /// </remarks>
        static void FindWordInParagraph()
        {
            // A regular expression (shortened as regex or regexp; sometimes referred to as rational expression) is a sequence of characters that specifies a search pattern in text.
            Regex regex = new Regex(@"\bcompany\b", RegexOptions.IgnoreCase);

            // Loop through all PDF files in a directory.
            foreach (string file in Directory.EnumerateFiles(@"..\..\files\", "*.pdf", SearchOption.AllDirectories))
            {
                DocumentCore dc = DocumentCore.Load(file);

                // Provides a functionality to paginate the document content.
                DocumentPaginator dp = dc.GetPaginator();
                foreach (ContentRange content in dc.Content.Find(regex))
                {
                    ElementFrame ef = dp.GetElementFrames().FirstOrDefault(e => content.Start.Equals(e.Content.Start));
                    Paragraph paragraph = content.Start.Parent.Parent as Paragraph;

                    // We are looking for a sentence in which this word was found.
                    string sentence = paragraph.Content.ToString().Trim();
                    Console.WriteLine("Filename: " + file + "\r\n" + sentence);

                    // The coordinates of the found word.
                    Console.WriteLine("Info:" + ef.Bounds.ToString());
                    Console.WriteLine("Next paragraph?");
                    Console.ReadKey();
                }
            }
        }
    }
}

Скачать

Imports Microsoft.VisualBasic
Imports System
Imports System.IO
Imports SautinSoft.Document
Imports SautinSoft.Document.Drawing
Imports System.Collections.Generic
Imports System.Linq
Imports System.Text
Imports System.Text.RegularExpressions

Namespace Example
	Friend Class Program
		Shared Sub Main(ByVal args() As String)
			If True Then
				FindWordInParagraph()
			End If
		End Sub
		''' <summary>
		''' Find any "word" in a folder with PDF files inside and show a paragraph, where this word will be found.
		''' You may change the extension: pdf, docx, rtf. 
		''' </summary>
		''' <remarks>
		''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-show-paragraph-containing-required-word-in-csharp-vb-net.php
		''' </remarks>
		Private Shared Sub FindWordInParagraph()
			' A regular expression (shortened as regex or regexp; sometimes referred to as rational expression) is a sequence of characters that specifies a search pattern in text.
			Dim regex As New Regex("\bcompany\b", RegexOptions.IgnoreCase)

			' Loop through all PDF files in a directory.
			For Each file As String In Directory.EnumerateFiles("..\files\", "*.pdf", SearchOption.AllDirectories)
				Dim dc As DocumentCore = DocumentCore.Load(file)

				' Provides a functionality to paginate the document content.
				Dim dp As DocumentPaginator = dc.GetPaginator()
				For Each content As ContentRange In dc.Content.Find(regex)
					Dim ef As ElementFrame = dp.GetElementFrames().FirstOrDefault(Function(e) content.Start.Equals(e.Content.Start))
					Dim paragraph As Paragraph = TryCast(content.Start.Parent.Parent, Paragraph)

					' We are looking for a sentence in which this word was found.
					Dim sentence As String = paragraph.Content.ToString().Trim()
					Console.WriteLine("Filename: " & file & vbCrLf & sentence)

					' The coordinates of the found word.
					Console.WriteLine("Info:" & ef.Bounds.ToString())
					Console.WriteLine("Next paragraph?")
					Console.ReadKey()
				Next content
			Next file
		End Sub
	End Class
End Namespace

Скачать


Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.ru или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже:



Вопросы и предложения всегда приветствуются!

Мы разрабатываем компоненты .Net с 2002 года. Мы знаем форматы PDF, DOCX, RTF, HTML, XLSX и Images. Если вам нужна помощь в создании, изменении или преобразовании документов в различных форматах, мы можем вам помочь. Мы напишем для вас любой пример кода абсолютно бесплатно.