Здесь мы покажем вам, как использовать полнотекстовый поиск в определенном каталоге, включая
подкаталоги.
Используя регулярные выражения, мы найдем - "video" (video, VIDEO, ViDeO
и т.д) во всех файлах (DOCX, RTF, PDF и HTML) внутри указанного каталога и выведем результаты на
консоль.
Полный код
using System;
using System.IO;
using System.Collections.Generic;
using SautinSoft.Document;
using System.Drawing;
using System.Drawing.Imaging;
using System.Linq;
using System.Text.RegularExpressions;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
// Get your free 100-day key here:
// https://sautinsoft.com/start-for-free/
string searchDir = Path.GetFullPath(@"..\..\..\searching\");
string searchText = "with";
FullTextSearching(searchDir, searchText);
}
/// <summary>
/// This sample shows how to launch full text search in the specific directory.
/// </summary>
/// <remarks>
/// Details: https://sautinsoft.com/products/document/help/net/developer-guide/full-text-searching-in-documents-net-csharp-vb.php
/// </remarks>
public static void FullTextSearching(string searchPath, string searchText)
{
DirectoryInfo searchDir = new DirectoryInfo(searchPath);
List<string> supportedFiles = new List<string>();
// 1. Find theS files to make search.
// Specify to make the search only in *.docx, *.rtf, *.pdf and *.html files,
// including subdirectories.
foreach (string file in Directory.GetFiles(searchDir.FullName, "*.*", SearchOption.AllDirectories))
{
string ext = Path.GetExtension(file).ToLower();
if (ext == ".docx" || ext == ".pdf" || ext == ".html" || ext == ".rtf")
supportedFiles.Add(file);
}
// 2. Perform the text search in the each file using a loop.
// We'll search the word "video" in the each and count how many times the file contains it.
Console.WriteLine($"The results for \"{searchText}\":");
int totalFiles = 0, totalMatches = 0;
foreach (string file in supportedFiles)
{
DocumentCore dc = DocumentCore.Load(file);
totalFiles++;
Regex regex = new Regex($"\\b({searchText})\\b", RegexOptions.IgnoreCase);
// Show also subfolder if we aren't in the root folder.
DirectoryInfo dirInfo = new DirectoryInfo(Path.GetDirectoryName(file));
string fileName = String.Empty;
if (dirInfo.FullName.TrimEnd(new char[] { '\\' }) != searchDir.FullName.TrimEnd(new char[] { '\\' }))
fileName = file.Substring(searchPath.Length, file.Length - searchPath.Length);
else
// We are in the root folder.
fileName = Path.GetFileName(file);
int matches = dc.Content.Find(regex).Count();
totalMatches += matches;
Console.WriteLine($"{totalFiles:D3} from {supportedFiles.Count} {fileName} - {matches} matches.");
}
Console.WriteLine($"\nSearching finished. {supportedFiles.Count} file(s) has been processed. Total matches: {totalMatches}.");
Console.WriteLine("Press any key ...");
Console.ReadKey();
}
}
}
Imports System
Imports System.IO
Imports System.Collections.Generic
Imports SautinSoft.Document
Imports System.Drawing
Imports System.Linq
Imports System.Text.RegularExpressions
Namespace Sample
Friend Class Sample
Shared Sub Main(ByVal args() As String)
Dim searchDir As String = Path.GetFullPath("..\..\..\searching\")
Dim searchText As String = "with"
FullTextSearching(searchDir, searchText)
End Sub
''' Get your free 100-day key here:
''' https://sautinsoft.com/start-for-free/
''' <summary>
''' This sample shows how to launch full text search in the specific directory.
''' </summary>
''' <remarks>
''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/full-text-searching-in-documents-net-csharp-vb.php
''' </remarks>
Public Shared Sub FullTextSearching(ByVal searchPath As String, ByVal searchText As String)
Dim searchDir As New DirectoryInfo(searchPath)
Dim supportedFiles As New List(Of String)()
' 1. Find theS files to make search.
' Specify to make the search only in *.docx, *.rtf, *.pdf and *.html files,
' including subdirectories.
For Each file As String In Directory.GetFiles(searchDir.FullName, "*.*", SearchOption.AllDirectories)
Dim ext As String = Path.GetExtension(file).ToLower()
If ext = ".docx" OrElse ext = ".pdf" OrElse ext = ".html" OrElse ext = ".rtf" Then
supportedFiles.Add(file)
End If
Next file
' 2. Perform the text search in the each file using a loop.
' We'll search the word "video" in the each and count how many times the file contains it.
Console.WriteLine($"The results for ""{searchText}"":")
Dim totalFiles As Integer = 0, totalMatches As Integer = 0
For Each file As String In supportedFiles
Dim dc As DocumentCore = DocumentCore.Load(file)
totalFiles += 1
Dim regex As New Regex($"\b({searchText})\b", RegexOptions.IgnoreCase)
' Show also subfolder if we aren't in the root folder.
Dim dirInfo As New DirectoryInfo(Path.GetDirectoryName(file))
Dim fileName As String = String.Empty
If dirInfo.FullName.TrimEnd(New Char() {"\"c}) <> searchDir.FullName.TrimEnd(New Char() {"\"c}) Then
fileName = file.Substring(searchPath.Length, file.Length - searchPath.Length)
Else
' We are in the root folder.
fileName = Path.GetFileName(file)
End If
Dim matches As Integer = dc.Content.Find(regex).Count()
totalMatches += matches
Console.WriteLine($"{totalFiles:D3} from {supportedFiles.Count} {fileName} - {matches} matches.")
Next file
Console.WriteLine($"Searching finished. {supportedFiles.Count} file(s) has been processed. Total matches: {totalMatches}.")
Console.WriteLine("Press any key ...")
Console.ReadKey()
End Sub
End Class
End Namespace
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.com или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: