Как преобразовать PDF в текст в многопоточном режиме на C# и .NET
Полный код
using System;
using System.IO;
using System.Collections.Generic;
using System.Threading;
using SautinSoft;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
// Before starting, we recommend to get a free 100-day key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// SautinSoft.PdfFocus.SetLicense("...");
ConvertPdfToTextInThread();
}
public class TArgument
{
public string PdfFile { get; set; }
public int PageNumber { get; set; }
}
public static void ConvertPdfToTextInThread()
{
string pdfs = Path.GetFullPath(@"..\..\..\");
string[] files = Directory.GetFiles(pdfs, "*.pdf");
List<Thread> threads = new List<Thread>();
for (int i = 0; i < files.Length; i++)
{
TArgument targ = new TArgument()
{
PdfFile = files[i],
PageNumber = 1
};
var t = new Thread((a) => ConvertToText(a));
t.Start(targ);
threads.Add(t);
}
foreach (var thread in threads)
thread.Join();
Console.WriteLine("Done!");
}
public static void ConvertToText(object targ)
{
TArgument targum = (TArgument)targ;
string pdfFile = targum.PdfFile;
int page = targum.PageNumber;
string textFile = Path.GetFileNameWithoutExtension(pdfFile) + ".txt";
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.WordOptions.ShowInvisibleText = true;
f.OpenPdf(pdfFile);
bool done = false;
if (f.PageCount > 0)
{
if (page >= f.PageCount)
page = 1;
if (f.ToText(textFile, page, page) == 0)
done = true;
f.ClosePdf();
}
if (done)
{
Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(textFile) { UseShellExecute = true });
}
else
Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports System.Threading
Imports SautinSoft
Module Sample
Sub Main()
' Before starting, we recommend to get a free 100-day key:
' https://sautinsoft.com/start-for-free/
' Apply the key here
' SautinSoft.PdfFocus.SetLicense("...");
ConvertPdfToTextInThread()
End Sub
Public Class TArgument
Public Property PdfFile() As String
Public Property PageNumber() As Integer
End Class
Public Sub ConvertPdfToTextInThread()
Dim pdfs As String = Path.GetFullPath("..\..\..\")
Dim files() As String = Directory.GetFiles(pdfs, "*.pdf")
Dim threads As New List(Of Thread)()
For i As Integer = 0 To files.Length - 1
Dim targ As New TArgument() With {
.PdfFile = files(i),
.PageNumber = 1
}
Dim t = New Thread(Sub(a) ConvertToText(a))
t.Start(targ)
threads.Add(t)
Next i
For Each t As Thread In threads
t.Join()
Next t
Console.WriteLine("Done!")
End Sub
Public Sub ConvertToText(ByVal targ As Object)
Dim targum As TArgument = DirectCast(targ, TArgument)
Dim pdfFile As String = targum.PdfFile
Dim page As Integer = targum.PageNumber
Dim textFile As String = Path.GetFileNameWithoutExtension(pdfFile) + ".txt"
Dim f As New SautinSoft.PdfFocus()
f.WordOptions.ShowInvisibleText = True
f.OpenPdf(pdfFile)
Dim done As Boolean = False
If f.PageCount > 0 Then
If page >= f.PageCount Then
page = 1
End If
If f.ToText(textFile, page, page) = 0 Then
done = True
End If
f.ClosePdf()
End If
If done Then
Console.WriteLine("{0}" & ControlChars.Tab & " - Done!", Path.GetFileName(pdfFile))
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(textFile) With {.UseShellExecute = True})
Else
Console.WriteLine("{0}" & ControlChars.Tab & " - Error!", Path.GetFileName(pdfFile))
End If
End Sub
End Module
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу [email protected] или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: