Как преобразовать PDF в Excel в многопоточном режиме на C# и .NET
Полный код
using System;
using System.IO;
using System.Collections.Generic;
using System.Threading;
using SautinSoft;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
// Before starting, we recommend to get a free 100-day key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// SautinSoft.PdfFocus.SetLicense("...");
ConvertPdfToExcelInThread();
}
public class TArgument
{
public string PdfFile { get; set; }
public int PageNumber { get; set; }
}
public static void ConvertPdfToExcelInThread()
{
string pdfs = Path.GetFullPath(@"..\..\..\");
string[] files = Directory.GetFiles(pdfs, "*.pdf");
List<Thread> threads = new List<Thread>();
for (int i = 0; i < files.Length; i++)
{
TArgument targ = new TArgument()
{
PdfFile = files[i],
PageNumber = 1
};
var t = new Thread((a) => ConvertToExcel(a));
t.Start(targ);
threads.Add(t);
}
foreach (var thread in threads)
thread.Join();
Console.WriteLine("Done!");
}
public static void ConvertToExcel(object targ)
{
TArgument targum = (TArgument)targ;
string pdfFile = targum.PdfFile;
int page = targum.PageNumber;
string excelFile = Path.GetFileNameWithoutExtension(pdfFile) + ".xlsx";
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
// The output result will be in XLSX (Excel modern format) or in XLS (Excel 97-2003 Workbook)
f.ExcelOptions.Format = SautinSoft.PdfFocus.Format.Xlsx;
// f.ExcelOptions.Format = SautinSoft.PdfFocus.Format.Xls;
// 'true' = Convert all data to spreadsheet (tabular and even textual).
// 'false' = Skip textual data and convert only tabular (tables) data.
f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
// 'true' = Preserve original page layout.
// 'false' = Place tables before text.
f.ExcelOptions.PreservePageLayout = true;
// The information includes the names for the culture, the writing system,
// the calendar used, the sort order of strings, and formatting for dates and numbers.
System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo("en-US");
ci.NumberFormat.NumberDecimalSeparator = ",";
ci.NumberFormat.NumberGroupSeparator = ".";
f.ExcelOptions.CultureInfo = ci;
f.OpenPdf(pdfFile);
bool done = false;
if (f.PageCount > 0)
{
if (page >= f.PageCount)
page = 1;
if (f.ToExcel(excelFile, page, page) == 0)
done = true;
f.ClosePdf();
}
if (done)
{
Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(excelFile) { UseShellExecute = true });
}
else
Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports System.Threading
Imports SautinSoft
Module Sample
Sub Main()
' Before starting, we recommend to get a free 100-day key:
' https://sautinsoft.com/start-for-free/
' Apply the key here
' SautinSoft.PdfFocus.SetLicense("...");
ConvertPdfToExcelInThread()
End Sub
Public Class TArgument
Public Property PdfFile() As String
Public Property PageNumber() As Integer
End Class
Public Sub ConvertPdfToExcelInThread()
Dim pdfs As String = Path.GetFullPath("..\..\..\")
Dim files() As String = Directory.GetFiles(pdfs, "*.pdf")
Dim threads As New List(Of Thread)()
For i As Integer = 0 To files.Length - 1
Dim targ As New TArgument() With {
.PdfFile = files(i),
.PageNumber = 1
}
Dim t = New Thread(Sub(a) ConvertToExcel(a))
t.Start(targ)
threads.Add(t)
Next i
For Each t As Thread In threads
t.Join()
Next t
Console.WriteLine("Done!")
End Sub
Public Sub ConvertToExcel(ByVal targ As Object)
Dim targum As TArgument = DirectCast(targ, TArgument)
Dim pdfFile As String = targum.PdfFile
Dim page As Integer = targum.PageNumber
Dim excelFile As String = Path.GetFileNameWithoutExtension(pdfFile) + ".xlsx"
Dim f As New SautinSoft.PdfFocus()
' The output result will be in XLSX (Excel modern format) or in XLS (Excel 97-2003 Workbook)
f.ExcelOptions.Format = SautinSoft.PdfFocus.Format.Xlsx
' f.ExcelOptions.Format = SautinSoft.PdfFocus.Format.Xls
' 'true' = Convert all data to spreadsheet (tabular and even textual).
' 'false' = Skip textual data and convert only tabular (tables) data.
f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = True
' 'true' = Preserve original page layout.
' 'false' = Place tables before text.
f.ExcelOptions.PreservePageLayout = True
' The information includes the names for the culture, the writing system,
' the calendar used, the sort order of strings, and formatting for dates and numbers.
Dim ci As New System.Globalization.CultureInfo("en-US")
ci.NumberFormat.NumberDecimalSeparator = ","
ci.NumberFormat.NumberGroupSeparator = "."
f.ExcelOptions.CultureInfo = ci
f.OpenPdf(pdfFile)
Dim done As Boolean = False
If f.PageCount > 0 Then
If page >= f.PageCount Then
page = 1
End If
If f.ToExcel(excelFile, page, page) = 0 Then
done = True
End If
f.ClosePdf()
End If
If done Then
Console.WriteLine("{0}" & vbTab & " - Done!", Path.GetFileName(pdfFile))
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(excelFile) With {.UseShellExecute = True})
Else
Console.WriteLine("{0}" & vbTab & " - Error!", Path.GetFileName(pdfFile))
End If
End Sub
End Module
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.ru или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: