Разница между фиксированным (Fixed) и текущим (Flowing) HTML на C# и .NET


Полный код

using System;
using System.IO;
using SautinSoft;

namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            // Before starting, we recommend to get a free 100-day key:
            // https://sautinsoft.com/start-for-free/
            
            // Apply the key here:
            // SautinSoft.PdfFocus.SetLicense("...");
			
            // Here we'll show you two modes of converting PDF to HTML:
            // PDF Focus .Net offers you the Fixed and Flowing modes by your choice.

            // HTML-Fixed (default) is better to use for rendering, because it completely 
            // repeats the PDF layout with the structure of pages. 
            // The markup of such documents is very complex and have a lot of tags styled by (x,y) coords.

            // HTML-Flowing is better for further processing by a human: editing and combining. 
            // The markup of such documents is much simple inside and has the flowing structure. 
            // It's very simple for understanding by a human. 
            // But the resulting HTML document doesn't look exactly the same as input PDF pixel by pixel.

            string pdfFile = Path.GetFullPath(@"..\..\..\License.pdf");
            string htmlFileFixed = "Fixed.html";
            string htmlFileFlowing = "Flowing.html";
			
            // Convert PDF file to HTML (Fixed and Flowing) file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // How to store images: Inside HTML document as base64 images or as linked separate image files.
            f.HtmlOptions.IncludeImageInHtml = true;

            f.OpenPdf(pdfFile);

            if (f.PageCount > 0)
            {
                // The HTML-Fixed mode.
                f.HtmlOptions.Title = "Fixed";
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Fixed;
                if (f.ToHtml(htmlFileFixed)==0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFileFixed) { UseShellExecute = true });

                // The HTML-Flowing mode.
                f.HtmlOptions.Title = "Flowing";
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Flowing;
                // Switch off character scaling and spacing to prevent 
                // adding of extra tags dividing the text by parts.
                f.HtmlOptions.KeepCharScaleAndSpacing = false;                

                if (f.ToHtml(htmlFileFlowing) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFileFlowing) { UseShellExecute = true });
            }
        }
    }
}

Download

Imports System
Imports System.IO
Imports SautinSoft

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
			' Before starting, we recommend to get a free 100-day key:
			' https://sautinsoft.com/start-for-free/

			' Apply the key here
			' SautinSoft.PdfFocus.SetLicense("...");

            ' Here we'll show you two modes of converting PDF to HTML:
            ' PDF Focus .Net offers you the Fixed and Flowing modes by your choice.

            ' HTML-Fixed (default) is better to use for rendering, because it completely 
            ' repeats the PDF layout with the structure of pages. 
            ' The markup of such documents is very complex and have a lot of tags styled by (x,y) coords.

            ' HTML-Flowing is better for further processing by a human: editing and combining. 
            ' The markup of such documents is much simple inside and has the flowing structure. 
            ' It's very simple for understanding by a human. 
            ' But the resulting HTML document doesn't look exactly the same as input PDF pixel by pixel.

            Dim pdfFile As String = Path.GetFullPath("..\..\..\License.pdf")
            Dim htmlFileFixed As String = "Fixed.html"
            Dim htmlFileFlowing As String = "Flowing.html"
		
            ' Convert PDF file to HTML (Fixed and Flowing) file
            Dim f As New SautinSoft.PdfFocus()

            ' How to store images: Inside HTML document as base64 images or as linked separate image files.
            f.HtmlOptions.IncludeImageInHtml = True

            f.OpenPdf(pdfFile)

            If f.PageCount > 0 Then
                ' The HTML-Fixed mode.
                f.HtmlOptions.Title = "Fixed"
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Fixed
                If f.ToHtml(htmlFileFixed) = 0 Then
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFileFixed) With {.UseShellExecute = True})
                End If

                ' The HTML-Flowing mode.
                f.HtmlOptions.Title = "Flowing"
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Flowing
                ' Switch off character scaling and spacing to prevent 
                ' adding of extra tags dividing the text by parts.
                f.HtmlOptions.KeepCharScaleAndSpacing = False

                If f.ToHtml(htmlFileFlowing) = 0 Then
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFileFlowing) With {.UseShellExecute = True})
                End If
            End If
        End Sub
    End Class
End Namespace

Download


Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.com или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже:



Вопросы и предложения всегда приветствуются!

Мы разрабатываем компоненты .Net с 2002 года. Мы знаем форматы PDF, DOCX, RTF, HTML, XLSX и Images. Если вам нужна помощь в создании, изменении или преобразовании документов в различных форматах, мы можем вам помочь. Мы напишем для вас любой пример кода абсолютно бесплатно.