Как преобразовать PDF в HTML в многопоточном режиме на C# и .NET


Полный код

using System;
using System.IO;
using System.Collections.Generic;
using System.Threading;
using SautinSoft;

namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            // Before starting, we recommend to get a free 100-day key:
            // https://sautinsoft.com/start-for-free/
            
            // Apply the key here:
            // SautinSoft.PdfFocus.SetLicense("...");
			
            ConvertPdfToHtmlInThread();
        }
        public class TArgument
        {
            public string PdfFile { get; set; }
            public string HtmlFile { get; set; }
            public int PageNumber { get; set; }
        }
        public static void ConvertPdfToHtmlInThread()
        {
            string pdfDir = Path.GetFullPath(@"..\..\..\");
            string[] pdfFiles = Directory.GetFiles(pdfDir, "*.pdf");
            DirectoryInfo htmlDir = new DirectoryInfo("HTML results");
            if (!htmlDir.Exists)
                htmlDir.Create();

            List<Thread> threads = new List<Thread>();
            foreach (string pdfFile in pdfFiles)
            {
                TArgument targ = new TArgument()
                {
                    PdfFile = pdfFile,
                    HtmlFile = Path.Combine(htmlDir.FullName, Path.GetFileNameWithoutExtension(pdfFile) + ".html"),
                    PageNumber = 1
                };

                var t = new Thread((a) => ConvertToHtml(a));
                t.Start(targ);
                threads.Add(t);
            }

            foreach (var thread in threads)
                thread.Join();
            Console.WriteLine("Done!");
            // Open the result for demonstration purposes.            
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlDir.FullName) { UseShellExecute = true });

        }

        public static void ConvertToHtml(object targ)
        {
            TArgument targum = (TArgument)targ;
            string pdfFile = targum.PdfFile;
            int page = targum.PageNumber;

            string htmlFile = targum.HtmlFile;
		
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto;
            f.HtmlOptions.IncludeImageInHtml = false;
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
            f.HtmlOptions.Title = String.Format("This document was produced from {0}.", Path.GetFileName(pdfFile));
            f.HtmlOptions.ImageFileName = "picture";

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                    page = 1;

                if (f.ToHtml(htmlFile, page, page) == 0)
                    done = true;
                f.ClosePdf();
            }

            if (done)
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
            else
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
        }
    }
}

Download

Option Infer On

Imports Microsoft.VisualBasic
Imports System
Imports System.IO
Imports System.Collections.Generic
Imports System.Threading
Imports SautinSoft

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
			' Before starting, we recommend to get a free 100-day key:
			' https://sautinsoft.com/start-for-free/

			' Apply the key here
			' SautinSoft.PdfFocus.SetLicense("...");

            ConvertPdfToHtmlInThread()
        End Sub
        Public Class TArgument
            Public Property PdfFile() As String
            Public Property HtmlFile() As String
            Public Property PageNumber() As Integer
        End Class
        Public Shared Sub ConvertPdfToHtmlInThread()
            Dim pdfDir As String = Path.GetFullPath("..\..\..\")
            Dim pdfFiles() As String = Directory.GetFiles(pdfDir, "*.pdf")
            Dim htmlDir As New DirectoryInfo("HTML results")
            If Not htmlDir.Exists Then
                htmlDir.Create()
            End If

            Dim threads As New List(Of Thread)()
            For Each pdfFile As String In pdfFiles
                Dim targ As New TArgument() With {
                    .PdfFile = pdfFile,
                    .HtmlFile = Path.Combine(htmlDir.FullName, Path.GetFileNameWithoutExtension(pdfFile) & ".html"),
                    .PageNumber = 1
                }

                Dim t = New Thread(Sub(a) ConvertToHtml(a))
                t.Start(targ)
                threads.Add(t)
            Next pdfFile

            For Each thread In threads
                thread.Join()
            Next thread
            Console.WriteLine("Done!")
            ' Open the result for demonstration purposes.            
            System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlDir.FullName) With {.UseShellExecute = True})

        End Sub

        Public Shared Sub ConvertToHtml(ByVal targ As Object)
            Dim targum As TArgument = DirectCast(targ, TArgument)
            Dim pdfFile As String = targum.PdfFile
            Dim page As Integer = targum.PageNumber

            Dim htmlFile As String = targum.HtmlFile
		
            Dim f As New SautinSoft.PdfFocus()

            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto
            f.HtmlOptions.IncludeImageInHtml = False
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile))
            f.HtmlOptions.Title = String.Format("This document was produced from {0}.", Path.GetFileName(pdfFile))
            f.HtmlOptions.ImageFileName = "picture"

            f.OpenPdf(pdfFile)

            Dim done As Boolean = False

            If f.PageCount > 0 Then
                If page >= f.PageCount Then
                    page = 1
                End If

                If f.ToHtml(htmlFile, page, page) = 0 Then
                    done = True
                End If
                f.ClosePdf()
            End If

            If done Then
                Console.WriteLine("{0}" & vbTab & " - Done!", Path.GetFileName(pdfFile))
            Else
                Console.WriteLine("{0}" & vbTab & " - Error!", Path.GetFileName(pdfFile))
            End If
        End Sub
    End Class
End Namespace

Download


Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.com или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже:



Вопросы и предложения всегда приветствуются!

Мы разрабатываем компоненты .Net с 2002 года. Мы знаем форматы PDF, DOCX, RTF, HTML, XLSX и Images. Если вам нужна помощь в создании, изменении или преобразовании документов в различных форматах, мы можем вам помочь. Мы напишем для вас любой пример кода абсолютно бесплатно.