How to load a HTML document in C# and VB.Net

  1. Load from a file:
    
    DocumentCore dc = DocumentCore.Load(@"d:\Book.html");
    
    The dc object represents a document loaded into memory. The file format is detected automatically from the file extension: ".Html".

    After loading you'll get the document presented as the Tree Of Objects, where the root node is DocumentCore class.

    To guarantee that a loadable content is really HTML and set some loading options, use HtmlLoadOptions as 2nd parameter.

    
    DocumentCore dc = DocumentCore.Load(@"d:\Book.html", new HtmlLoadOptions());
    
  2. Load from a Stream:
    
                // Let us say we already have a HTML document as array of bytes.
                DocumentCore dc = null;
                using (MemoryStream htmlStream = new MemoryStream(htmlBytes))
                {
                    dc = DocumentCore.Load(htmlStream, new HtmlLoadOptions());
                }
                // Here we can do with our document 'dc' anything we need.
    
 

Complete code

using System.IO;
using SautinSoft.Document;

namespace Example
{
    class Program
    {
        
        static void Main(string[] args)
        {
            LoadHtmlFromFile();
            //LoadHtmlFromStream();
        }

        // From a file
        static void LoadHtmlFromFile()
        {
            string filePath = @"..\..\example.html";
            // The file format is detected automatically from the file extension: ".html".
            // But as shown in the example below, we can specify HtmlLoadOptions as 2nd parameter
            // to explicitly set that a loadable document has HTML format.
            DocumentCore dc = DocumentCore.Load(filePath);
        }

        // From a Stream
        static void LoadHtmlFromStream()
        {
            // Get document bytes.
            byte[] fileBytes = File.ReadAllBytes(@"..\..\example.html");

            DocumentCore dc = null;

            // Create a MemoryStream
            using (MemoryStream ms = new MemoryStream(fileBytes))
            {
                // Load a document from the MemoryStream.
                // Specifying HtmlLoadOptions we explicitly set that a loadable document is HTML.
                dc = DocumentCore.Load(ms, new HtmlLoadOptions());
            }
        }
    }
}

Download.

        
            Imports System.IO
Imports SautinSoft.Document

Module ExampleVB

    Sub Main()
        LoadHtmlFromFile()
        'LoadHtmlFromStream();
    End Sub

    ' From a file
    Public Sub LoadHtmlFromFile()
        Dim filePath As String = "..\example.html"

        ' The file format is detected automatically from the file extension: ".html".
        ' But as shown in the example below, we can specify HtmlLoadOptions as 2nd parameter
        ' to explicitly set that a loadable document has HTML format.
        Dim dc As DocumentCore = DocumentCore.Load(filePath)
    End Sub

    ' From a Stream
    Public Sub LoadHtmlFromStream()

        ' Get document bytes.
        Dim fileBytes() As Byte = File.ReadAllBytes("..\example.html")

        Dim dc As DocumentCore = Nothing

        ' Create a MemoryStream
        Using ms As New MemoryStream(fileBytes)

            ' Load a document from the MemoryStream.
            ' Specifying HtmlLoadOptions we explicitly set that a loadable document is HTML.
            dc = DocumentCore.Load(ms, New HtmlLoadOptions())
        End Using
    End Sub
End Module

Download.

© SautinSoft 2019