Как преобразовать PDF в HTML в памяти на C# и .NET
Полный код
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
// Before starting, we recommend to get a free 100-day key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// SautinSoft.PdfFocus.SetLicense("...");
ConvertPdfBytesToHtml();
//ConvertPdfStreamToHtml();
}
private static void ConvertPdfBytesToHtml()
{
// We need files only for demonstration purposes.
// The whole conversion process will be done in memory.
string pdfFile = Path.GetFullPath(@"..\..\..\simple text.pdf");
string htmlFile = "Result.html";
// Convert PDF to HTML in memory
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
// Let's force the component to store images inside HTML document
// using base-64 encoding.
// Thus the component will not use HDD.
f.HtmlOptions.IncludeImageInHtml = true;
f.HtmlOptions.Title = "Simple text";
// Read a PDF document to byte array
// Assume that we already have the PDF as array of bytes.
byte[] pdf = File.ReadAllBytes(pdfFile);
f.OpenPdf(pdf);
if (f.PageCount > 0)
{
// Convert PDF to HTML in memory
string html = f.ToHtml();
// Save HTML to the file only for demonstration purpose.
if (html != null)
{
File.WriteAllText(htmlFile, html);
// Open the result for demonstration purposes.
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
}
}
}
private static void ConvertPdfStreamToHtml()
{
// We need files only for demonstration purposes.
// The whole conversion process will be done in memory.
string pdfFile = Path.GetFullPath(@"..\..\..\simple text.pdf");
string htmlFile = "Result.html";
// Get your free 100-day key here:
// https://sautinsoft.com/start-for-free/
// Convert PDF to HTML in memory
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
// Let's force the component to store images inside HTML document
// using base-64 encoding.
// Thus the component will not use HDD.
f.HtmlOptions.IncludeImageInHtml = true;
f.HtmlOptions.Title = "Simple text";
// Assume that we have a PDF document as Stream.
using (FileStream fs = File.OpenRead(pdfFile))
{
f.OpenPdf(fs);
if (f.PageCount > 0)
{
// Convert PDF to HTML to a MemoryStream.
using (MemoryStream msHtml = new MemoryStream())
{
int res = f.ToHtml(msHtml);
// Open the result for demonstration purposes.
if (res == 0)
{
File.WriteAllBytes(htmlFile, msHtml.ToArray());
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
}
}
}
}
}
}
}
Imports System
Imports System.IO
Namespace Sample
Friend Class Sample
Shared Sub Main(ByVal args() As String)
' Before starting, we recommend to get a free 100-day key:
' https://sautinsoft.com/start-for-free/
' Apply the key here
' SautinSoft.PdfFocus.SetLicense("...");
ConvertPdfBytesToHtml()
'ConvertPdfStreamToHtml()
End Sub
Private Shared Sub ConvertPdfBytesToHtml()
' We need files only for demonstration purposes.
' The whole conversion process will be done in memory.
Dim pdfFile As String = Path.GetFullPath("..\..\..\simple text.pdf")
Dim htmlFile As String = "Result.html"
' Convert PDF to HTML in memory
Dim f As New SautinSoft.PdfFocus()
' Let's force the component to store images inside HTML document
' using base-64 encoding.
' Thus the component will not use HDD.
f.HtmlOptions.IncludeImageInHtml = True
f.HtmlOptions.Title = "Simple text"
' Read a PDF document to byte array
' Assume that we already have the PDF as array of bytes.
Dim pdf() As Byte = File.ReadAllBytes(pdfFile)
f.OpenPdf(pdf)
If f.PageCount > 0 Then
' Convert PDF to HTML in memory
Dim html As String = f.ToHtml()
' Save HTML to the file only for demonstration purpose.
If html IsNot Nothing Then
File.WriteAllText(htmlFile, html)
' Open the result for demonstration purposes.
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
End If
End If
End Sub
Private Shared Sub ConvertPdfStreamToHtml()
' We need files only for demonstration purposes.
' The whole conversion process will be done in memory.
Dim pdfFile As String = Path.GetFullPath("..\..\..\simple text.pdf")
Dim htmlFile As String = "Result.html"
' Get your free 100-day key here:
' https://sautinsoft.com/start-for-free/
' Convert PDF to HTML in memory
Dim f As New SautinSoft.PdfFocus()
' Let's force the component to store images inside HTML document
' using base-64 encoding.
' Thus the component will not use HDD.
f.HtmlOptions.IncludeImageInHtml = True
f.HtmlOptions.Title = "Simple text"
' Assume that we have a PDF document as Stream.
Using fs As FileStream = File.OpenRead(pdfFile)
f.OpenPdf(fs)
If f.PageCount > 0 Then
' Convert PDF to HTML to a MemoryStream.
Using msHtml As New MemoryStream()
Dim res As Integer = f.ToHtml(msHtml)
' Open the result for demonstration purposes.
If res = 0 Then
File.WriteAllBytes(htmlFile, msHtml.ToArray())
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
End If
End Using
End If
End Using
End Sub
End Class
End Namespace
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.ru или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: