горизонтально расположенные полосы: белая, синяя, красная

Как конвертировать PDF в HTML на C# и VB.NET

ASP.NET - просмотрщик PDF

using System;
using System.Collections.Generic;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using SautinSoft;

public partial class _Default : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {
        uplPDF.Attributes["onchange"] = "UploadFile(this)";
        if (!this.IsPostBack)
        {
            Session["page"] = 1;
        }
    }
    protected void btnNext_Click(object sender, EventArgs e)
    {
        int page = (int)Session["page"];
        page++;
        if (IsPageInRange(page))
        {
            Session["page"] = page;
            ShowPdf();
        }
    }
    protected void btnPrev_Click(object sender, EventArgs e)
    {
        int page = (int)Session["page"];
        page--;
        if (IsPageInRange(page))
        {
            Session["page"] = page;
            ShowPdf();
        }
    }
    protected void Upload(object sender, EventArgs e)
    {
        lblMessage.Visible = true;
        PdfFocus f = new PdfFocus();
        f.OpenPdf(uplPDF.FileBytes);
        Session["focus"] = f;
        ShowPdf();
    }
    protected bool IsPageInRange(int page)
    {
        if (Session["focus"] != null)
        {
            PdfFocus f = (PdfFocus)Session["focus"];
            if (page > 0 && page <= f.PageCount)
                return true;
        }
        return false;
    }
    private void ShowPdf()
    {
        if (Session["focus"] != null)
        {
            PdfFocus f = (PdfFocus)Session["focus"];
            if (f.PageCount > 0)
            {
                f.HtmlOptions.IncludeImageInHtml = true;
                f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Png;
                int page = (int)Session["page"];

                string html = f.ToHtml(page, page);
                htmlLiteral.Text = html;
                txtPage.Text = String.Format("Page {0} of {1}", page, f.PageCount);
            }
        }
    }
}
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="_Default" %>

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
    <head runat="server">
        <title></title>
        <link rel="stylesheet" href="/lib/bootstrap/dist/css/bootstrap.css" />
        <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
        <script>
        $(document).ready(function () {

            var zoom = 1.0;
            var zoomStep = 0.05;
            var margin = 0;
            var marginStep = 30;

            $('#zoom-in').click(function () {
                margin += marginStep;
                $('#page-window').css({ transform: 'scale(' + (zoom += zoomStep) + ')' });
                var w = $('#page-window').css("width");

                $('#page-window').css({ "margin-left": (margin) });
                $('#page-window').css({ "margin-top": (margin) });
            })
            $('#zoom-out').click(function () {
                margin -= marginStep;

                $('#page-window').css({ transform: 'scale(' + (zoom -= zoomStep) + ')' });
                $('#page-window').css({ "margin-left": margin });
                $('#page-window').css({ "margin-top": margin });
            })
        });
        </script>
        <script type="text/javascript">
        function UploadFile(fileUpload) {
            if (fileUpload.value != '') {
                document.getElementById("<%=btnUpload.ClientID %<").click();
            }
        }
        </script>
    </head>
    <body>
        <form class="panel-body" id="form1" runat="server">
            <div class="panel">

                <asp:FileUpload ID="uplPDF" runat="server" ToolTip="Please select a PDF document." /><br />
                <asp:Label ID="lblMessage" runat="server" Text="File uploaded successfully." ForeColor="Green" Visible="false" />
                <asp:Button CssClass="btn btn-primary" ID="btnUpload" Text="Upload" runat="server" OnClick="Upload" Style="display: none" />
                <asp:Button CssClass="btn btn-primary" ID="btnPrev" runat="server" Text="Prev" OnClick="btnPrev_Click" />
                <asp:TextBox ID="txtPage" runat="server"></asp:TextBox>
                <asp:Button CssClass="btn btn-primary" ID="btnNext" runat="server" Text="Next" OnClick="btnNext_Click" />
                <span class="text-center">
                    <input class="btn btn-primary" type="button" value="+" id="zoom-in" />
                    <input class="btn btn-primary" type="button" value="-" id="zoom-out" />
                </span>
            </div>
            <div id="page-window">
                <asp:Literal ID="htmlLiteral" runat="server" />
            </div>
        </form>
    </body>
</html>
Imports System
Imports System.Data
Imports System.Configuration
Imports System.Web
Imports System.Web.Security
Imports System.Web.UI
Imports System.Web.UI.WebControls
Imports System.Web.UI.WebControls.WebParts
Imports System.Web.UI.HtmlControls
Imports System.IO
Imports SautinSoft

Partial Public Class _Default
    Inherits System.Web.UI.Page
    Protected Sub Page_Load(ByVal sender As Object, ByVal e As EventArgs)
        uplPDF.Attributes("onchange") = "UploadFile(this)"
        If Not Me.IsPostBack Then
            Session("page") = 1
        End If
    End Sub

    Protected Sub btnNext_Click(ByVal sender As Object, ByVal e As EventArgs)
        Dim page As Integer = CInt(Fix(Session("page")))
        page += 1
        If IsPageInRange(page) Then
            Session("page") = page
            ShowPdf()
        End If
    End Sub

    Protected Sub btnPrev_Click(ByVal sender As Object, ByVal e As EventArgs)
        Dim page As Integer = CInt(Fix(Session("page")))
        page -= 1
        If IsPageInRange(page) Then
            Session("page") = page
            ShowPdf()
        End If
    End Sub

    Protected Sub Upload(ByVal sender As Object, ByVal e As EventArgs)
        lblMessage.Visible = True
        Dim f As New PdfFocus()
        f.OpenPdf(uplPDF.FileBytes)
        Session("focus") = f
        ShowPdf()
    End Sub

    Protected Function IsPageInRange(ByVal page As Integer) As Boolean
        If Session("focus") IsNot Nothing Then
            Dim f As PdfFocus = CType(Session("focus"), PdfFocus)
            If page > 0 AndAlso page <= f.PageCount Then
                Return True
            End If
        End If
        Return False
    End Function

    Private Sub ShowPdf()
        If Session("focus") IsNot Nothing Then
            Dim f As PdfFocus = CType(Session("focus"), PdfFocus)
            If f.PageCount > 0 Then
                f.HtmlOptions.IncludeImageInHtml = True
                f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Png

                Dim page As Integer = CInt(Fix(Session("page")))
                Dim html As String = f.ToHtml(page, page)
                htmlLiteral.Text = html
                txtPage.Text = String.Format("Page {0} of {1}", page, f.PageCount)
            End If
        End If
    End Sub
End Class
<%@ Page Language="VB" AutoEventWireup="true" CodeFile="Default.aspx.vb" Inherits="_Default" %>

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
    <head runat="server">
        <title></title>
        <link rel="stylesheet" href="/lib/bootstrap/dist/css/bootstrap.css" />
        <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
        <script>
        $(document).ready(function () {

            var zoom = 1.0;
            var zoomStep = 0.05;
            var margin = 0;
            var marginStep = 30;

            $('#zoom-in').click(function () {
                margin += marginStep;
                $('#page-window').css({ transform: 'scale(' + (zoom += zoomStep) + ')' });
                var w = $('#page-window').css("width");

                $('#page-window').css({ "margin-left": (margin) });
                $('#page-window').css({ "margin-top": (margin) });
            })
            $('#zoom-out').click(function () {
                margin -= marginStep;

                $('#page-window').css({ transform: 'scale(' + (zoom -= zoomStep) + ')' });
                $('#page-window').css({ "margin-left": margin });
                $('#page-window').css({ "margin-top": margin });
            })
        });
        </script>
        <script type="text/javascript">
        function UploadFile(fileUpload) {
            if (fileUpload.value != '') {
                document.getElementById("<%=btnUpload.ClientID %>").click();
            }
        }
        </script>
    </head>
    <body>
        <form class="panel-body" id="form1" runat="server">
            <div class="panel">
                <asp:FileUpload ID="uplPDF" runat="server" ToolTip="Please select a PDF document." /><br />
                <asp:Label ID="lblMessage" runat="server" Text="File uploaded successfully." ForeColor="Green" Visible="false" />
                <asp:Button CssClass="btn btn-primary" ID="btnUpload" Text="Upload" runat="server" OnClick="Upload" Style="display: none" />
                <asp:Button CssClass="btn btn-primary" ID="btnPrev" runat="server" Text="Prev" OnClick="btnPrev_Click" />
                <asp:TextBox ID="txtPage" runat="server"></asp:TextBox>
                <asp:Button CssClass="btn btn-primary" ID="btnNext" runat="server" Text="Next" OnClick="btnNext_Click" />
                <span class="text-center">
                    <input class="btn btn-primary" type="button" value="+" id="zoom-in" />
                    <input class="btn btn-primary" type="button" value="-" id="zoom-out" />
                </span>
            </div>
            <div id="page-window">
                <asp:Literal ID="htmlLiteral" runat="server" />
            </div>
        </form>
    </body>
</html>

Конвертирование нескольких PDF-файлов в HTML

using System;
using System.IO;
using System.Linq;
using System.Text;
using SautinSoft;

namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            ConvertMultiplePdfToHtmls();
            //ConvertMultiplePdfToSingleHtml();
        }
        /// <summary>
        /// Converts multiple PDF files to HTML files.
        /// </summary>
        static void ConvertMultiplePdfToHtmls()
        {
            // Directory with *.pdf files.
            string pdfDirectory = Path.GetFullPath(@"..\..\");
            string[] pdfFiles = Directory.GetFiles(pdfDirectory, "*.pdf");

            DirectoryInfo htmlDirectory = new DirectoryInfo(@"htmls");
            if (!htmlDirectory.Exists)
                htmlDirectory.Create();

            PdfFocus f = new PdfFocus();
            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "XXXXXXXXXXX";
            int success = 0;
            int total = 0;

            foreach (string pdfFile in pdfFiles)
            {
                Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile));
                f.OpenPdf(pdfFile);
                total++;
                if (f.PageCount > 0)
                {
                    // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder".
                    f.HtmlOptions.ImageFolder = htmlDirectory.FullName;

                    // A folder (will be created by the component) without any drive letters, only the folder as "myfolder".
                    f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));

                    // A template name for images
                    f.HtmlOptions.ImageFileName = "picture";

                    // Auto - the same image format as in the source PDF;
                    // 'Jpeg' to make the document size less;
                    // 'PNG' to keep the highest quality, but the highest size too.
                    f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto;

                    // How to store images: Inside HTML document as base64 images or as linked separate image files.
                    f.HtmlOptions.IncludeImageInHtml = false;

                    string htmlFile = Path.GetFileNameWithoutExtension(pdfFile) + ".html";
                    string htmlFilePath = Path.Combine(htmlDirectory.FullName, htmlFile);
                    if (f.ToHtml(htmlFilePath) == 0)
                    {
                        success++;
                    }
                }
            }
            // Show results:
            Console.WriteLine("{0} of {1} files converted successfully!", success, total);

            // Open folder with HTML files after converting.
            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlDirectory.FullName) { UseShellExecute = true });
        }
        /// <summary>
        /// Converts multiple PDF files into a single HTML document.
        /// </summary>
        static void ConvertMultiplePdfToSingleHtml()
        {
            // Directory with *.pdf files.
            string pdfDirectory = Path.GetFullPath(@"..\..\");
            string htmlFile = "Result.html";
            string[] pdfFiles = Directory.GetFiles(pdfDirectory, "*.pdf");

            // Here we'll keep our Html document.
            StringBuilder singleHtml = new StringBuilder();
            singleHtml.Append("<html>\r\n<head>\r\n");
            singleHtml.Append(@"<meta http-equiv = ""Content-Type"" content=""text/html; charset=utf-8"" />");
            singleHtml.Append("\r\n</head>\r\n<body>");

            PdfFocus f = new PdfFocus();
            //f.Serial = "XXXXXXXXXXX";
            int success = 0;
            int total = 0;

            foreach (string pdfFile in pdfFiles)
            {
                Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile));
                f.OpenPdf(pdfFile);
                total++;
                if (f.PageCount > 0)
                {
                    // How to store images: Inside HTML document as base64 images or as linked separate image files.
                    f.HtmlOptions.IncludeImageInHtml = false;

                    // Create own subfolder for each converted file to store images separately and don't mix up them.
                    f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));

                    // A template name for images
                    f.HtmlOptions.ImageFileName = "picture";

                    // Auto - the same image format as in the source PDF;
                    // 'Jpeg' to make the document size less;
                    // 'PNG' to keep the highest quality, but the highest size too.
                    f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto;

                    // Let's make our CSS inline to be able merge HTML documents without any problems.
                    f.HtmlOptions.InlineCSS = true;

                    // We need only contents of <body>...</body>.
                    f.HtmlOptions.ProduceOnlyHtmlBody = true;
                    string tempHtml = f.ToHtml();
                    if (!String.IsNullOrEmpty(tempHtml))
                    {
                        success++;
                        // Add tempHtml into a single HTML.
                        singleHtml.Append(tempHtml);
                    }
                }
            }
            singleHtml.Append("</body></html>");

            // Show results:
            File.WriteAllText(htmlFile, singleHtml.ToString());
            Console.WriteLine("{0} of {1} files converted and merged into {2}!", success, total, Path.GetFileName(htmlFile));

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
        }
    }
}
Imports Microsoft.VisualBasic
Imports System
Imports System.IO
Imports System.Linq
Imports System.Text
Imports SautinSoft

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            'ConvertMultiplePdfToHtmls()
            ConvertMultiplePdfToSingleHtml()
        End Sub

        ''' <summary>
        ''' Converts multiple PDF files to HTML files.
        ''' </summary>
        Private Shared Sub ConvertMultiplePdfToHtmls()
            ' Directory with *.pdf files.
            Dim pdfDirectory As String = Path.GetFullPath("..\")
            Dim pdfFiles() As String = Directory.GetFiles(pdfDirectory, "*.pdf")
            Dim htmlDirectory As New DirectoryInfo("htmls")
            If Not htmlDirectory.Exists Then
                htmlDirectory.Create()
            End If

            Dim f As New PdfFocus()
            ' After purchasing the license, please insert your serial number here to activate the component:
            'f.Serial = "XXXXXXXXXXX";

            Dim success As Integer = 0
            Dim total As Integer = 0

            For Each pdfFile As String In pdfFiles
                Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile))
                f.OpenPdf(pdfFile)
                total += 1
                If f.PageCount > 0 Then
                    ' Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder".
                    f.HtmlOptions.ImageFolder = htmlDirectory.FullName

                    ' A folder (will be created by the component) without any drive letters, only the folder as "myfolder".
                    f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile))

                    ' A template name for images
                    f.HtmlOptions.ImageFileName = "picture"

                    ' Auto - the same image format as in the source PDF;
                    ' 'Jpeg' to make the document size less;
                    ' 'PNG' to keep the highest quality, but the highest size too.
                    f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto

                    ' How to store images: Inside HTML document as base64 images or as linked separate image files.
                    f.HtmlOptions.IncludeImageInHtml = False

                    Dim htmlFile As String = Path.GetFileNameWithoutExtension(pdfFile) & ".html"
                    Dim htmlFilePath As String = Path.Combine(htmlDirectory.FullName, htmlFile)
                    If f.ToHtml(htmlFilePath) = 0 Then
                        success += 1
                    End If
                End If
            Next pdfFile
            ' Show results:
            Console.WriteLine("{0} of {1} files converted successfully!", success, total)

            ' Open folder with HTML files after converting.
            ' Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlDirectory.FullName) With {.UseShellExecute = True})
        End Sub

        ''' <summary>
        ''' Converts multiple PDF files into a single HTML document.
        ''' </summary>
        Private Shared Sub ConvertMultiplePdfToSingleHtml()
            ' Directory with *.pdf files.
            Dim pdfDirectory As String = Path.GetFullPath("..\")
            Dim htmlFile As String = "Result.html"
            Dim pdfFiles() As String = Directory.GetFiles(pdfDirectory, "*.pdf")

            ' Here we'll keep our Html document.
            Dim singleHtml As New StringBuilder()
            singleHtml.Append("<html>" & vbCrLf & "<head>" & vbCrLf)
            singleHtml.Append("<meta http-equiv = ""Content-Type"" content=""text/html; charset=utf-8"" />")
            singleHtml.Append(vbCrLf & "</head>" & vbCrLf & "<body>")

            Dim f As New PdfFocus()
            'f.Serial = "XXXXXXXXXXX";

            Dim success As Integer = 0
            Dim total As Integer = 0

            For Each pdfFile As String In pdfFiles
                Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile))
                f.OpenPdf(pdfFile)
                total += 1
                If f.PageCount > 0 Then
                    ' How to store images: Inside HTML document as base64 images or as linked separate image files.
                    f.HtmlOptions.IncludeImageInHtml = False

                    ' Create own subfolder for each converted file to store images separately and don't mix up them.
                    f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile))

                    ' A template name for images
                    f.HtmlOptions.ImageFileName = "picture"

                    ' Auto - the same image format as in the source PDF;
                    ' 'Jpeg' to make the document size less;
                    ' 'PNG' to keep the highest quality, but the highest size too.
                    f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto

                    ' Let's make our CSS inline to be able merge HTML documents without any problems.
                    f.HtmlOptions.InlineCSS = True

                    ' We need only contents of <body>...</body>.
                    f.HtmlOptions.ProduceOnlyHtmlBody = True

                    Dim tempHtml As String = f.ToHtml()
                    If Not String.IsNullOrEmpty(tempHtml) Then
                        success += 1
                        ' Add tempHtml into a single HTML.
                        singleHtml.Append(tempHtml)
                    End If
                End If
            Next pdfFile
            singleHtml.Append("</body></html>")

            ' Show results:
            File.WriteAllText(htmlFile, singleHtml.ToString())
            Console.WriteLine("{0} of {1} files converted and merged into {2}!", success, total, Path.GetFileName(htmlFile))

            ' Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
        End Sub
    End Class
End Namespace

Конвертирование PDF в HTML

using System;
using System.IO;
using SautinSoft;

namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            string pdfFile = @"..\..\simple text.pdf";
            string htmlFile = "Result.html";

            // Convert PDF file to HTML file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "XXXXXXXXXXX";

            // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder".
            f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile);

            // A folder (will be created by the component) without any drive letters, only the folder as "myfolder".
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));

            // Auto - the same image format as in the source PDF;
            // 'Jpeg' to make the document size less;
            // 'PNG' to keep the highest quality, but the highest size too.
            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto;

            // How to store images: Inside HTML document as base64 images or as linked separate image files.
            f.HtmlOptions.IncludeImageInHtml = false;

            // Set <title>...</title>
            f.HtmlOptions.Title = String.Format("This HTML was converted from {0}.", Path.GetFileName(pdfFile));
            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                int res = f.ToHtml(htmlFile);
                // Open the result for demonstration purposes.
                if (res == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
            }
        }
    }
}
Imports System
Imports System.IO
Imports SautinSoft

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            Dim pdfFile As String = "..\simple text.pdf"
            Dim htmlFile As String = "Result.html"

            ' Convert PDF file to HTML file
            Dim f As New SautinSoft.PdfFocus()

            ' After purchasing the license, please insert your serial number here to activate the component:
            'f.Serial = "XXXXXXXXXXX";

            ' Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder".
            f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile)

            ' A folder (will be created by the component) without any drive letters, only the folder as "myfolder".
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile))

            ' Auto - the same image format as in the source PDF;
            ' 'Jpeg' to make the document size less;
            ' 'PNG' to keep the highest quality, but the highest size too.
            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto

            ' How to store images: Inside HTML document as base64 images or as linked separate image files.
            f.HtmlOptions.IncludeImageInHtml = False

            ' Set <title>...</title>
            f.HtmlOptions.Title = String.Format("This HTML was converted from {0}.", Path.GetFileName(pdfFile))
            f.OpenPdf(pdfFile)
            If f.PageCount > 0 Then
                Dim res As Integer = f.ToHtml(htmlFile)

                ' Open the result for demonstration purposes.
                If res = 0 Then
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
                End If
            End If
        End Sub
    End Class
End Namespace

Конвертирование PDF в HTML в памяти

using System;
using System.IO;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            ConvertPdfBytesToHtml();
            //ConvertPdfStreamToHtml();
        }
        private static void ConvertPdfBytesToHtml()
        {
            // We need files only for demonstration purposes.
            // The whole conversion process will be done in memory.
            string pdfFile = @"..\..\simple text.pdf";
            string htmlFile = "Result.html";

            // Convert PDF to HTML in memory
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            // This property is necessary only for licensed version.
            //f.Serial = "XXXXXXXXXXX";

            // Let's force the component to store images inside HTML document
            // using base-64 encoding.
            // Thus the component will not use HDD.
            f.HtmlOptions.IncludeImageInHtml = true;
            f.HtmlOptions.Title = "Simple text";

            // Read a PDF document to byte array
            // Assume that we already have the  PDF as array of bytes.
            byte[] pdf = File.ReadAllBytes(pdfFile);
            f.OpenPdf(pdf);
            if (f.PageCount > 0)
            {
                // Convert PDF to HTML in memory
                string html = f.ToHtml();

                // Save HTML to the file only for demonstration purpose.
                if (html != null)
                {
                    File.WriteAllText(htmlFile, html);

                    // Open the result for demonstration purposes.
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
                }
            }
        }
        private static void ConvertPdfStreamToHtml()
        {
            // We need files only for demonstration purposes.
            // The whole conversion process will be done in memory.
            string pdfFile = @"..\..\simple text.pdf";
            string htmlFile = "Result.html";

            // Convert PDF to HTML in memory
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            // This property is necessary only for licensed version.
            //f.Serial = "XXXXXXXXXXX";

            // Let's force the component to store images inside HTML document
            // using base-64 encoding.
            // Thus the component will not use HDD.
            f.HtmlOptions.IncludeImageInHtml = true;
            f.HtmlOptions.Title = "Simple text";

            // Assume that we have a PDF document as Stream.
            using (FileStream fs = File.OpenRead(pdfFile))
            {
                f.OpenPdf(fs);
                if (f.PageCount > 0)
                {
                    // Convert PDF to HTML to a MemoryStream.
                    using (MemoryStream msHtml = new MemoryStream())
                    {
                        int res = f.ToHtml(msHtml);
                        // Open the result for demonstration purposes.
                        if (res == 0)
                        {
                            File.WriteAllBytes(htmlFile, msHtml.ToArray());
                            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
                        }
                    }
                }
            }
        }
    }
}
Imports System
Imports System.IO

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            ConvertPdfBytesToHtml()
            'ConvertPdfStreamToHtml()
        End Sub

        Private Shared Sub ConvertPdfBytesToHtml()
            ' We need files only for demonstration purposes.
            ' The whole conversion process will be done in memory.
            Dim pdfFile As String = "..\simple text.pdf"
            Dim htmlFile As String = "Result.html"

            ' Convert PDF to HTML in memory
            Dim f As New SautinSoft.PdfFocus()

            ' This property is necessary only for licensed version.
            'f.Serial = "XXXXXXXXXXX";

            ' Let's force the component to store images inside HTML document
            ' using base-64 encoding.
            ' Thus the component will not use HDD.
            f.HtmlOptions.IncludeImageInHtml = True
            f.HtmlOptions.Title = "Simple text"

            ' Read a PDF document to byte array
            ' Assume that we already have the  PDF as array of bytes.
            Dim pdf() As Byte = File.ReadAllBytes(pdfFile)
            f.OpenPdf(pdf)
            If f.PageCount > 0 Then
                ' Convert PDF to HTML in memory
                Dim html As String = f.ToHtml()

                ' Save HTML to the file only for demonstration purpose.
                If html IsNot Nothing Then
                    File.WriteAllText(htmlFile, html)
                    ' Open the result for demonstration purposes.
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
                End If
            End If
        End Sub
        Private Shared Sub ConvertPdfStreamToHtml()
            ' We need files only for demonstration purposes.
            ' The whole conversion process will be done in memory.
            Dim pdfFile As String = "..\simple text.pdf"
            Dim htmlFile As String = "Result.html"

            ' Convert PDF to HTML in memory
            Dim f As New SautinSoft.PdfFocus()

            ' This property is necessary only for licensed version.
            'f.Serial = "XXXXXXXXXXX";

            ' Let's force the component to store images inside HTML document
            ' using base-64 encoding.
            ' Thus the component will not use HDD.
            f.HtmlOptions.IncludeImageInHtml = True
            f.HtmlOptions.Title = "Simple text"

            ' Assume that we have a PDF document as Stream.
            Using fs As FileStream = File.OpenRead(pdfFile)
                f.OpenPdf(fs)
                If f.PageCount > 0 Then
                    ' Convert PDF to HTML to a MemoryStream.
                    Using msHtml As New MemoryStream()
                        Dim res As Integer = f.ToHtml(msHtml)
                        ' Open the result for demonstration purposes.
                        If res = 0 Then
                            File.WriteAllBytes(htmlFile, msHtml.ToArray())
                            System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
                        End If
                    End Using
                End If
            End Using
        End Sub
    End Class
End Namespace

Конвертирование PDF в HTML в памяти и получение списка со всеми изображениями

using System;
using System.IO;
using System.Collections.Generic;
using System.Drawing;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            ConvertPdfBytesToHtml();
        }
        private static void ConvertPdfBytesToHtml()
        {
            // We need files only for demonstration purposes.
            // The whole conversion process will be done in memory.
            string pdfFile = @"..\..\simple text.pdf";
            string htmlFile = "Result.htm";

            // This is the list with extracted images.
            // It will be filled by images after the conversion.
            List<Image> imgCollection = new List<Image>();

            // Convert PDF to HTML in memory
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            // This property is necessary only for licensed version.
            //f.Serial = "XXXXXXXXXXX";

            // Let's force the component to store images inside HTML document
            // using base-64 encoding.
            // Thus the component will not use HDD.
            f.HtmlOptions.IncludeImageInHtml = true;
            f.HtmlOptions.Title = "Simple text";

            // Read a PDF document to byte array.
            // Assume that we already have the PDF as array of bytes.
            byte[] pdf = File.ReadAllBytes(pdfFile);
            f.OpenPdf(pdf);
            if (f.PageCount > 0)
            {
                // Convert PDF to HTML in memory
                string htmlString = f.ToHtml(1, f.PageCount, imgCollection);

                // Save HTML to a file only for the demonstration purpose.
                if (htmlString != null)
                {
                    // Show info about images and save them
                    Console.WriteLine("After converting we've got {0} image(s):", imgCollection.Count);
                    DirectoryInfo imgDir = new DirectoryInfo("Extracted Images");
                    if (!imgDir.Exists)
                        imgDir.Create();

                    int count = 1;
                    foreach (Image img in imgCollection)
                    {
                        Console.WriteLine("\t {0,4} x {1,4} px", img.Width, img.Height);
                        string imageFileName = Path.Combine(imgDir.FullName, String.Format($"pict{count}.jpg"));
                        img.Save(imageFileName, System.Drawing.Imaging.ImageFormat.Jpeg);
                        count++;
                    }
                    // Open the result for demonstration purposes.
                    File.WriteAllText(htmlFile, htmlString);
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(imgDir.FullName) { UseShellExecute = true });
                }
            }
        }
    }
}
Imports Microsoft.VisualBasic
Imports System
Imports System.IO
Imports System.Collections.Generic
Imports System.Drawing

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            ConvertPdfBytesToHtml()
        End Sub

        Private Shared Sub ConvertPdfBytesToHtml()
            ' We need files only for demonstration purposes.
            ' The whole conversion process will be done in memory.
            Dim pdfFile As String = "..\simple text.pdf"
            Dim htmlFile As String = "Result.htm"

            ' This is the list with extracted images.
            ' It will be filled by images after the conversion.
            Dim imgCollection As New List(Of Image)()

            ' Convert PDF to HTML in memory
            Dim f As New SautinSoft.PdfFocus()

            ' This property is necessary only for licensed version.
            'f.Serial = "XXXXXXXXXXX";

            ' Let's force the component to store images inside HTML document
            ' using base-64 encoding.
            ' Thus the component will not use HDD.
            f.HtmlOptions.IncludeImageInHtml = True
            f.HtmlOptions.Title = "Simple text"

            ' Read a PDF document to byte array.
            ' Assume that we already have the  PDF as array of bytes.
            Dim pdf() As Byte = File.ReadAllBytes(pdfFile)
            f.OpenPdf(pdf)
            If f.PageCount > 0 Then
                ' Convert PDF to HTML in memory
                Dim htmlString As String = f.ToHtml(1, f.PageCount, imgCollection)

                ' Save HTML to a file only for the demonstration purpose.
                If htmlString IsNot Nothing Then
                    ' Show info about images and save them
                    Console.WriteLine("After converting we've got {0} image(s):", imgCollection.Count)
                    Dim imgDir As New DirectoryInfo("Extracted Images")
                    If Not imgDir.Exists Then
                        imgDir.Create()
                    End If

                    Dim count As Integer = 1
                    For Each img As Image In imgCollection
                        Console.WriteLine(vbTab & " {0,4} x {1,4} px", img.Width, img.Height)
                        Dim imageFileName As String = Path.Combine(imgDir.FullName, String.Format($"pict{count}.jpg"))
                        img.Save(imageFileName, System.Drawing.Imaging.ImageFormat.Jpeg)
                        count += 1
                    Next img
                    ' Open the result for demonstration purposes.
                    File.WriteAllText(htmlFile, htmlString)
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(imgDir.FullName) With {.UseShellExecute = True})
                End If
            End If
        End Sub
    End Class
End Namespace

Конвертирование PDF в HTML в многопоточном режиме

using System;
using System.IO;
using System.Collections.Generic;
using System.Threading;
using SautinSoft;

namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            ConvertPdfToHtmlInThread();
        }
        public class TArgument
        {
            public string PdfFile { get; set; }
            public string HtmlFile { get; set; }
            public int PageNumber { get; set; }
        }
        public static void ConvertPdfToHtmlInThread()
        {
            string pdfDir = @"..\..\";
            string[] pdfFiles = Directory.GetFiles(pdfDir, "*.pdf");
            DirectoryInfo htmlDir = new DirectoryInfo("HTML results");
            if (!htmlDir.Exists)
                htmlDir.Create();

            List<Thread> threads = new List<Thread>();
            foreach (string pdfFile in pdfFiles)
            {
                TArgument targ = new TArgument()
                {
                    PdfFile = pdfFile,
                    HtmlFile = Path.Combine(htmlDir.FullName, Path.GetFileNameWithoutExtension(pdfFile) + ".html"),
                    PageNumber = 1
                };
                var t = new Thread((a) => ConvertToHtml(a));
                t.Start(targ);
                threads.Add(t);
            }
            foreach (var thread in threads)
                thread.Join();
            Console.WriteLine("Done.");
            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlDir.FullName) { UseShellExecute = true });
        }

        public static void ConvertToHtml(object targ)
        {
            TArgument targum = (TArgument)targ;
            string pdfFile = targum.PdfFile;
            int page = targum.PageNumber;
            string htmlFile = targum.HtmlFile;

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto;
            f.HtmlOptions.IncludeImageInHtml = false;
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
            f.HtmlOptions.Title = String.Format("This document was produced from {0}.", Path.GetFileName(pdfFile));
            f.HtmlOptions.ImageFileName = "picture";

            f.OpenPdf(pdfFile);
            bool done = false;
            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                    page = 1;
                if (f.ToHtml(htmlFile, page, page) == 0)
                    done = true;
                f.ClosePdf();
            }
            if (done)
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
            else
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
        }
    }
}
Option Infer On

Imports Microsoft.VisualBasic
Imports System
Imports System.IO
Imports System.Collections.Generic
Imports System.Threading
Imports SautinSoft

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            ConvertPdfToHtmlInThread()
        End Sub

        Public Class TArgument
            Public Property PdfFile() As String
            Public Property HtmlFile() As String
            Public Property PageNumber() As Integer
        End Class

        Public Shared Sub ConvertPdfToHtmlInThread()
            Dim pdfDir As String = "..\"
            Dim pdfFiles() As String = Directory.GetFiles(pdfDir, "*.pdf")
            Dim htmlDir As New DirectoryInfo("HTML results")
            If Not htmlDir.Exists Then
                htmlDir.Create()
            End If

            Dim threads As New List(Of Thread)()
            For Each pdfFile As String In pdfFiles
                Dim targ As New TArgument() With {
                    .PdfFile = pdfFile,
                    .HtmlFile = Path.Combine(htmlDir.FullName, Path.GetFileNameWithoutExtension(pdfFile) & ".html"),
                    .PageNumber = 1
                }
                Dim t = New Thread(Sub(a) ConvertToHtml(a))
                t.Start(targ)
                threads.Add(t)
            Next pdfFile

            For Each thread In threads
                thread.Join()
            Next thread
            Console.WriteLine("Done.")
            ' Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlDir.FullName) With {.UseShellExecute = True})
        End Sub

        Public Shared Sub ConvertToHtml(ByVal targ As Object)
            Dim targum As TArgument = DirectCast(targ, TArgument)
            Dim pdfFile As String = targum.PdfFile
            Dim page As Integer = targum.PageNumber
            Dim htmlFile As String = targum.HtmlFile
            Dim f As New SautinSoft.PdfFocus()

            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto
            f.HtmlOptions.IncludeImageInHtml = False
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile))
            f.HtmlOptions.Title = String.Format("This document was produced from {0}.", Path.GetFileName(pdfFile))
            f.HtmlOptions.ImageFileName = "picture"

            f.OpenPdf(pdfFile)

            Dim done As Boolean = False
            If f.PageCount > 0 Then
                If page >= f.PageCount Then
                    page = 1
                End If
                If f.ToHtml(htmlFile, page, page) = 0 Then
                    done = True
                End If
                f.ClosePdf()
            End If
            If done Then
                Console.WriteLine("{0}" & vbTab & " - Done!", Path.GetFileName(pdfFile))
            Else
                Console.WriteLine("{0}" & vbTab & " - Error!", Path.GetFileName(pdfFile))
            End If
        End Sub
    End Class
End Namespace

Конвертирование PDF в отдельные HTML-страницы

using System;
using System.IO;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            // Convert PDF to separate HTMLs.
            // Each PDF page will be converted to a single HTML document.
            string pdfFile = @"..\..\simple text.pdf";
            DirectoryInfo htmlDir = new DirectoryInfo("htmls");
            if (!htmlDir.Exists)
                htmlDir.Create();

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            // This property is necessary only for licensed version.
            //f.Serial = "XXXXXXXXXXX";

            f.HtmlOptions.IncludeImageInHtml = false;

            // Path (must exist) to a directory to store images after converting.
            f.HtmlOptions.ImageFolder = htmlDir.FullName;
            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // Convert each PDF page to separate HTML document.
                // simple text.html, simple text.html ... simple text.html.
                for (int page = 1; page <= f.PageCount; page++)
                {
                    f.HtmlOptions.Title = $"Page {page}";
                    f.HtmlOptions.ImageSubFolder = String.Format("page{0}_images", page);
                    string htmlString = f.ToHtml(page, page);

                    // Save htmlString to file
                    string htmlFile = Path.Combine(htmlDir.FullName, $"Page{page}.html");
                    File.WriteAllText(htmlFile, htmlString);

                    // Let's open only 1st and last pages.
                    if (page == 1 || page == f.PageCount)
                    {
                        // Open the result for demonstration purposes.
                        System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
                    }
                }
            }
        }
    }
}
Imports System
Imports System.IO

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            ' Convert PDF to separate HTMLs.
            ' Each PDF page will be converted to a single HTML document.
            Dim pdfFile As String = "..\simple text.pdf"
            Dim htmlDir As New DirectoryInfo("htmls")
            If Not htmlDir.Exists Then
                htmlDir.Create()
            End If

            Dim f As New SautinSoft.PdfFocus()
            ' This property is necessary only for licensed version.
            'f.Serial = "XXXXXXXXXXX";

            f.HtmlOptions.IncludeImageInHtml = False

            ' Path (must exist) to a directory to store images after converting.
            f.HtmlOptions.ImageFolder = htmlDir.FullName
            f.OpenPdf(pdfFile)
            If f.PageCount > 0 Then
                ' Convert each PDF page to separate HTML document.
                ' simple text.html, simple text.html ... simple text.html.
                For page As Integer = 1 To f.PageCount
                    f.HtmlOptions.Title = $"Page {page}"
                    f.HtmlOptions.ImageSubFolder = String.Format("page{0}_images", page)
                    Dim htmlString As String = f.ToHtml(page, page)

                    ' Save htmlString to file
                    Dim htmlFile As String = Path.Combine(htmlDir.FullName, $"Page{page}.html")
                    File.WriteAllText(htmlFile, htmlString)

                    ' Let's open only 1st and last pages.
                    If page = 1 OrElse page = f.PageCount Then
                        ' Open the result for demonstration purposes.
                        System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
                    End If
                Next page
            End If
        End Sub
    End Class
End Namespace

Как указать расположение изображений во время преобразования PDF в HTML

using System;
using System.IO;
using SautinSoft;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            // Here you will find how to keep images in the resulting HTML document.
            string pdfFile = @"..\..\simple text.pdf";
            string htmlFile = "Result.html";

            // Convert PDF file to HTML file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "XXXXXXXXXXX";

            // Way 1 (default): Images will be stored inside HTML document as base64, jpeg images.
            /*
                f.HtmlOptions.IncludeImageInHtml = true;
                // Auto - the same image format as in the source PDF;
                // 'Jpeg' to make the document size less;
                // 'PNG' to keep the highest quality, but the highest size too.
                f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Jpeg;
            */

            // Way 2: Images will be stored as JPG files in a special folder "{pdf name}_images".
            // Images will have names "picture100.jpg", "picture101.jpg" .. "pictureN.jpg".
            // Let's set the quality for jpeg to 95 percents.
            f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile);

            // Auto - the same image format as in the source PDF;
            // 'Jpeg' to make the document size less;
            // 'PNG' to keep the highest quality, but the highest size too.
            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Jpeg;
            f.EmbeddedJpegQuality = 95;
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
            f.HtmlOptions.ImageFileName = "picture";
            f.HtmlOptions.ImageNumStart = 100;
            f.HtmlOptions.IncludeImageInHtml = false;

            // Way 3: Images will be stored as PNG files in the same directory with the HTML file.
            // All images on each page will be combined in a single image.
            /*
                f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile);
                // 'Jpeg' to make the document size less; Or 'PNG' to keep the highest quality.
                f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Png;
                f.HtmlOptions.ImageSubFolder = "";
                f.HtmlOptions.IncludeImageInHtml = false;
            */

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                int res = f.ToHtml(htmlFile);
                // Open the result for demonstration purposes.
                if (res == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
            }
        }
    }
}
Imports System
Imports System.IO
Imports SautinSoft

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            ' Here you will find how to keep images in the resulting HTML document.
            Dim pdfFile As String = "..\simple text.pdf"
            Dim htmlFile As String = "Result.html"

            ' Convert PDF file to HTML file
            Dim f As New SautinSoft.PdfFocus()

            ' After purchasing the license, please insert your serial number here to activate the component:
            'f.Serial = "XXXXXXXXXXX";

            ' Way 1 (default): Images will be stored inside HTML document as base64, jpeg images.
            'f.HtmlOptions.IncludeImageInHtml = True
            ' Auto - the same image format as in the source PDF;
            ' 'Jpeg' to make the document size less;
            ' 'PNG' to keep the highest quality, but the highest size too.
            'f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto

            ' Way 2: Images will be stored as JPG files in a special folder "{pdf name}_images".
            ' Images will have names "picture100.jpg", "picture101.jpg" .. "pictureN.jpg".
            ' Let's set the quality for jpeg to 95 percents.
            f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile)

            ' 'Jpeg' to make the document size less; Or 'PNG' to keep the highest quality.
            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Jpeg
            f.EmbeddedJpegQuality = 95
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile))
            f.HtmlOptions.ImageFileName = "picture"
            f.HtmlOptions.ImageNumStart = 100
            f.HtmlOptions.IncludeImageInHtml = False

            ' Way 3: Images will be stored as PNG files in the same directory with the HTML file.
            ' All images on each page will be combined in a single image.            '
            'f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile)
            ' 'Jpeg' to make the document size less; Or 'PNG' to keep the highest quality.
            'f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Png
            'f.HtmlOptions.ImageSubFolder = ""
            'f.HtmlOptions.IncludeImageInHtml = False
            f.OpenPdf(pdfFile)
            If f.PageCount > 0 Then
                Dim res As Integer = f.ToHtml(htmlFile)
                ' Open the result for demonstration purposes.
                If res = 0 Then
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
                End If
            End If
        End Sub
    End Class
End Namespace

Разница между фиксированным и текущим HTML

using System;
using System.IO;
using SautinSoft;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            // Here we'll show you two modes of converting PDF to HTML:
            // PDF Focus .Net offers you the Fixed and Flowing modes by your choice.

            // HTML-Fixed (default) is better to use for rendering, because it completely
            // repeats the PDF layout with the structure of pages.
            // The markup of such documents is very complex and have a lot of tags styled by (x,y) coords.

            // HTML-Flowing is better for further processing by a human: editing and combining.
            // The markup of such documents is much simple inside and has the flowing structure.
            // It's very simple for understanding by a human.
            // But the resulting HTML document doesn't look exactly the same as input PDF pixel by pixel.

            string pdfFile = @"..\..\License.pdf";
            string htmlFileFixed = "Fixed.html";
            string htmlFileFlowing = "Flowing.html";

            // Convert PDF file to HTML (Fixed and Flowing) file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "XXXXXXXXXXX";

            // How to store images: Inside HTML document as base64 images or as linked separate image files.
            f.HtmlOptions.IncludeImageInHtml = true;
            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // The HTML-Fixed mode.
                f.HtmlOptions.Title = "Fixed";
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Fixed;
                if (f.ToHtml(htmlFileFixed)==0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFileFixed) { UseShellExecute = true });

                // The HTML-Flowing mode.
                f.HtmlOptions.Title = "Flowing";
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Flowing;

                // Switch off character scaling and spacing to prevent
                // adding of extra tags dividing the text by parts.
                f.HtmlOptions.KeepCharScaleAndSpacing = false;
                if (f.ToHtml(htmlFileFlowing) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFileFlowing) { UseShellExecute = true });
            }
        }
    }
}
Imports System
Imports System.IO
Imports SautinSoft

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            ' Here we'll show you two modes of converting PDF to HTML:
            ' PDF Focus .Net offers you the Fixed and Flowing modes by your choice.

            ' HTML-Fixed (default) is better to use for rendering, because it completely
            ' repeats the PDF layout with the structure of pages.
            ' The markup of such documents is very complex and have a lot of tags styled by (x,y) coords.

            ' HTML-Flowing is better for further processing by a human: editing and combining.
            ' The markup of such documents is much simple inside and has the flowing structure.
            ' It's very simple for understanding by a human.
            ' But the resulting HTML document doesn't look exactly the same as input PDF pixel by pixel.

            Dim pdfFile As String = "..\License.pdf"
            Dim htmlFileFixed As String = "Fixed.html"
            Dim htmlFileFlowing As String = "Flowing.html"

            ' Convert PDF file to HTML (Fixed and Flowing) file
            Dim f As New SautinSoft.PdfFocus()

            ' After purchasing the license, please insert your serial number here to activate the component:
            'f.Serial = "XXXXXXXXXXX";

            ' How to store images: Inside HTML document as base64 images or as linked separate image files.
            f.HtmlOptions.IncludeImageInHtml = True
            f.OpenPdf(pdfFile)
            If f.PageCount > 0 Then
                ' The HTML-Fixed mode.
                f.HtmlOptions.Title = "Fixed"
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Fixed
                If f.ToHtml(htmlFileFixed) = 0 Then
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFileFixed) With {.UseShellExecute = True})
                End If

                ' The HTML-Flowing mode.
                f.HtmlOptions.Title = "Flowing"
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Flowing

                ' Switch off character scaling and spacing to prevent
                ' adding of extra tags dividing the text by parts.
                f.HtmlOptions.KeepCharScaleAndSpacing = False
                If f.ToHtml(htmlFileFlowing) = 0 Then
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFileFlowing) With {.UseShellExecute = True})
                End If
            End If
        End Sub
    End Class
End Namespace

Другие примеры кода SautinSoft.PdfFocus

PDF в Word ✦ PDF в HTML PDF в Img Image из PDF PDF в Excel PDF в XML PDF в Text PDF во Всё
 ВВЕРХ