Как конвертировать DOC (Word 97-2003) в текст - SautinSoft.Document

  1. Добавьте SautinSoft.Document из Nuget.
  2. Загрузите входной документ.
  3. Сохраните в текстовый формат.

SautinSoft.Document может помочь Вашему приложению преобразовать документ из одного формата в другой.
Вам нужно будет только Load() документ и Save() в нужном формате:


            DocumentCore dc = DocumentCore.Load(inpFile);
            dc.Save(outFile);

SautinSoft.Document поддерживает форматы:

PDF DOCX RTF HTML Текст Изображения
Create/Read/Write Create/Read/Write Create/Read/Write Create/Read/Write Create/Read/Write Create/Read(OCR)/Write

Полный код

using System.IO;
using SautinSoft.Document;

namespace Example
{
    class Program
    {
        static void Main(string[] args)
        {
            // Get your free 30-day key here:   
            // https://sautinsoft.com/start-for-free/

            ConvertFromFile();
            ConvertFromStream();
        }

        /// <summary>
        /// Convert DOC (Word 97-2003) to Text (file to file).
        /// </summary>
		/// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-doc-word-97-2003-to-text-in-csharp-vb.php
        /// </remarks>
        static void ConvertFromFile()
        {
            string inpFile = @"..\..\..\example.doc";
            string outFile = @"Result.txt";

            DocumentCore dc = DocumentCore.Load(inpFile);
            dc.Save(outFile);

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
        }

        /// <summary>
        /// Convert DOC (Word 97-2003) to Text (using Stream).
        /// </summary>
		/// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-doc-word-97-2003-to-text-in-csharp-vb.php
        /// </remarks>
        static void ConvertFromStream()
        {

            // We need files only for demonstration purposes.
            // The conversion process will be done completely in memory.
            string inpFile = @"..\..\..\example.doc";
            string outFile = @"ResultStream.txt";
            byte[] inpData = File.ReadAllBytes(inpFile);
            byte[] outData = null;

            using (MemoryStream msInp = new MemoryStream(inpData))
            {

                // Load a document.
                DocumentCore dc = DocumentCore.Load(msInp, new DocLoadOptions());

                // Save the document to text format.
                using (MemoryStream outMs = new MemoryStream())
                {
                    dc.Save(outMs, new TxtSaveOptions() );
                    outData = outMs.ToArray();                    
                }
                // Show the result for demonstration purposes.
                if (outData != null)
                {
                    File.WriteAllBytes(outFile, outData);
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
                }
            }
        }
    }
}

Download

Imports System
Imports System.IO
Imports SautinSoft.Document

Module Sample
    Sub Main()
        ConvertFromFile()
        ConvertFromStream()
    End Sub
        ''' Get your free 30-day key here:   
        ''' https://sautinsoft.com/start-for-free/
	''' <summary>
	''' Convert DOC (Word 97-2003) to Text (file to file).
	''' </summary>
	''' <remarks>
	''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-doc-word-97-2003-to-text-in-csharp-vb.php
	''' </remarks>
	Sub ConvertFromFile()
		Dim inpFile As String = "..\..\..\example.doc"
		Dim outFile As String = "Result.txt"

		Dim dc As DocumentCore = DocumentCore.Load(inpFile)
		dc.Save(outFile)

		' Open the result for demonstration purposes.
		System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
	End Sub

	''' <summary>
	''' Convert DOC (Word 97-2003) to Text (using Stream).
	''' </summary>
	''' <remarks>
	''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-doc-word-97-2003-to-text-in-csharp-vb.php
	''' </remarks>
	Sub ConvertFromStream()

		' We need files only for demonstration purposes.
		' The conversion process will be done completely in memory.
		Dim inpFile As String = "..\..\..\example.doc"
		Dim outFile As String = "ResultStream.txt"
		Dim inpData() As Byte = File.ReadAllBytes(inpFile)
		Dim outData() As Byte = Nothing

		Using msInp As New MemoryStream(inpData)

			' Load a document.
			Dim dc As DocumentCore = DocumentCore.Load(msInp, New DocLoadOptions())

			' Save the document to text format.
			Using outMs As New MemoryStream()
				dc.Save(outMs, New TxtSaveOptions())
				outData = outMs.ToArray()
			End Using
			' Show the result for demonstration purposes.
			If outData IsNot Nothing Then
				File.WriteAllBytes(outFile, outData)
				System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
			End If
		End Using
	End Sub
End Module

Download


Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу [email protected] или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже:



Вопросы и предложения всегда приветствуются!

Мы разрабатываем компоненты .Net с 2002 года. Мы знаем форматы PDF, DOCX, RTF, HTML, XLSX и Images. Если вам нужна помощь в создании, изменении или преобразовании документов в различных форматах, мы можем вам помочь. Мы напишем для вас любой пример кода абсолютно бесплатно.