Как преобразовать PDF на отдельные HTML страницы на C# и .NET
Полный код
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
// Before starting, we recommend to get a free 100-day key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// SautinSoft.PdfFocus.SetLicense("...");
// Convert PDF to separate HTMLs.
// Each PDF page will be converted to a single HTML document.
string pdfFile = Path.GetFullPath(@"..\..\..\simple text.pdf");
DirectoryInfo htmlDir = new DirectoryInfo("htmls");
if (!htmlDir.Exists)
htmlDir.Create();
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.HtmlOptions.IncludeImageInHtml = false;
// Path (must exist) to a directory to store images after converting.
f.HtmlOptions.ImageFolder = htmlDir.FullName;
f.OpenPdf(pdfFile);
if (f.PageCount > 0)
{
// Convert each PDF page to separate HTML document.
// simple text.html, simple text.html ... simple text.html.
for (int page = 1; page <= f.PageCount; page++)
{
f.HtmlOptions.Title = $"Page {page}";
f.HtmlOptions.ImageSubFolder = String.Format("page{0}_images", page);
string htmlString = f.ToHtml(page, page);
// Save htmlString to file
string htmlFile = Path.Combine(htmlDir.FullName, $"Page{page}.html");
File.WriteAllText(htmlFile, htmlString);
// Let's open only 1st and last pages.
if (page == 1 || page == f.PageCount)
{
// Open the result for demonstration purposes.
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
}
}
}
}
}
}
Imports System
Imports System.IO
Namespace Sample
Friend Class Sample
Shared Sub Main(ByVal args() As String)
' Before starting, we recommend to get a free 100-day key:
' https://sautinsoft.com/start-for-free/
' Apply the key here
' SautinSoft.PdfFocus.SetLicense("...");
' Convert PDF to separate HTMLs.
' Each PDF page will be converted to a single HTML document.
Dim pdfFile As String = Path.GetFullPath("..\..\..\simple text.pdf")
Dim htmlDir As New DirectoryInfo("htmls")
If Not htmlDir.Exists Then
htmlDir.Create()
End If
Dim f As New SautinSoft.PdfFocus()
f.HtmlOptions.IncludeImageInHtml = False
' Path (must exist) to a directory to store images after converting.
f.HtmlOptions.ImageFolder = htmlDir.FullName
f.OpenPdf(pdfFile)
If f.PageCount > 0 Then
' Convert each PDF page to separate HTML document.
' simple text.html, simple text.html ... simple text.html.
For page As Integer = 1 To f.PageCount
f.HtmlOptions.Title = $"Page {page}"
f.HtmlOptions.ImageSubFolder = String.Format("page{0}_images", page)
Dim htmlString As String = f.ToHtml(page, page)
' Save htmlString to file
Dim htmlFile As String = Path.Combine(htmlDir.FullName, $"Page{page}.html")
File.WriteAllText(htmlFile, htmlString)
' Let's open only 1st and last pages.
If page = 1 OrElse page = f.PageCount Then
' Open the result for demonstration purposes.
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
End If
Next page
End If
End Sub
End Class
End Namespace
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.com или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: