горизонтально расположенные полосы: белая, синяя, красная

Как конвертировать PDF в Word на C# и VB.NET

ASP.NET - Экспорт PDF в Word

Простой ASP.NET для экспорта PDF-документа в формат RTF. Процесс преобразования будет выполняться в памяти и полученный RTF-документ будет отображаться в браузере клиента как встроенный объект.

using System;
public partial class _Default : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {
        Result.Text = "";
    }
    protected void Button1_Click(object sender, EventArgs e)
    {
        if (FileUpload1.PostedFile.FileName.Length == 0 || FileUpload1.FileBytes.Length == 0)
        {
            Result.Text = "Please select PDF file at first!";
            return;
        }
        byte[] rtf = null;
        SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
        //this property is necessary only for registered version
        //f.Serial = "XXXXXXXXXXX";

        f.OpenPdf(FileUpload1.FileBytes);
        if (f.PageCount > 0)
        {
            //Let's whole PDF document to Word (RTF)
            f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
            // You may also set an output format to Docx.
            //f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
            rtf = f.ToWord();
        }

        //show Word/rtf
        if (rtf != null)
        {
            ShowResult(rtf, "Result.rtf", "application/msword");
        }
        else
        {
            Result.Text = "Converting failed!";
        }
    }
    private void ShowResult(byte[] data, string fileName, string contentType)
    {
        Response.Buffer = true;
        Response.Clear();
        Response.ContentType = contentType;
        Response.AddHeader("content-disposition", "inline; filename=\"" + fileName + "\"");
        Response.BinaryWrite(data);
        Response.Flush();
        Response.End();
    }
}
Imports System
Imports System.Data
Imports System.Configuration
Imports System.Web
Imports System.Web.Security
Imports System.Web.UI
Imports System.Web.UI.WebControls
Imports System.Web.UI.WebControls.WebParts
Imports System.Web.UI.HtmlControls
Imports System.IO

Partial Public Class _Default
    Inherits System.Web.UI.Page
    Protected Sub Page_Load(ByVal sender As Object, ByVal e As EventArgs)
        Result.Text = ""
    End Sub
    Protected Sub Button1_Click(ByVal sender As Object, ByVal e As EventArgs)
        If FileUpload1.PostedFile.FileName.Length = 0 OrElse FileUpload1.FileBytes.Length = 0 Then
            Result.Text = "Please select PDF file at first!"
            Return
        End If

        Dim rtf() As Byte = Nothing
        Dim f As New SautinSoft.PdfFocus()
        'this property is necessary only for registered version
        'f.Serial = "XXXXXXXXXXX"

        f.OpenPdf(FileUpload1.FileBytes)
        If f.PageCount > 0 Then
            'Let's whole PDF document to Word (RTF)
            f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf

            ' You may also set an output format to Docx.
            'f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
            rtf = f.ToWord()
        End If

        'show Word/rtf
        If rtf IsNot Nothing Then
            ShowResult(rtf, "Result.rtf", "application/msword")
        Else
            Result.Text = "Converting failed!"
        End If
    End Sub
    Private Sub ShowResult(ByVal data() As Byte, ByVal fileName As String, ByVal contentType As String)
        Response.Buffer = True
        Response.Clear()
        Response.ContentType = contentType
        Response.AddHeader("content-disposition", "inline; filename=""" & fileName & """")
        Response.BinaryWrite(data)
        Response.Flush()
        Response.End()
    End Sub
End Class

Конвертирование диапазона страниц PDF в Word

Это простое консольное приложение показывает, как преобразовать указанный диапазон страниц (от 2 до 4) из PDF в RTF.

using System;
using System.IO;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            string pathToPdf = @"..\..\Potato Beetle.pdf";
            string pathToWord = Path.ChangeExtension(pathToPdf, ".rtf");

            // Convert diapason of PDF pages to a Word file.
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            // this property is necessary only for registered version.
            //f.Serial = "XXXXXXXXXXX";

            f.OpenPdf(pathToPdf);
            if (f.PageCount > 0)
            {
                // You may set an output format to docx or rtf.
                f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
                // Convert only pages 2 - 4 to Word.
                int result = f.ToWord(pathToWord, 2, 4);
                // Show Word document
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(pathToWord);
                }
            }
        }
    }
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft

Module Sample

    Sub Main()
        Dim pathToPdf As String = "..\Potato Beetle.pdf"
        Dim pathToWord As String = Path.ChangeExtension(pathToPdf, ".rtf")

        ' Convert diapason of PDF pages to a Word file.
        Dim f As New SautinSoft.PdfFocus()
        ' this property is necessary only for registered version.
        'f.Serial = "XXXXXXXXXXX"

        f.OpenPdf(pathToPdf)
        If f.PageCount > 0 Then
            ' You may set an output format to docx or rtf.
            f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf
            ' Convert only pages 2 - 4 to Word.
            Dim result As Integer = f.ToWord(pathToWord, 2, 4)
            ' Show Word document
            If result = 0 Then
                System.Diagnostics.Process.Start(pathToWord)
            End If
        End If
    End Sub
End Module

Конвертирование каждой страницы PDF в отдельные документы DOCX

Это простое консольное приложение показывает, как преобразовать каждую страницу PDF-документа в отдельный файл DOCX с именем "{filename} - страница {number}.docx".

using System;
using System.IO;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            // Convert whole PDF document to separate Word documents.
            // Each PDF page will be converted to a single Word document.

            // Path to a PDF file.
            string pdfPath = Path.GetFullPath(@"..\..\simple text.pdf");

            // Directory to store Word documents.
            string docxDir = Path.GetDirectoryName(pdfPath);
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            f.OpenPdf(pdfPath);

            // Convert each PDF page to separate Word document.
            // simple text - page 1.docx, simple text- page 2.docx ... simple text - page N.doc.
            for (int page = 1; page <= f.PageCount; page++)
            {
                // You may select between Docx and Rtf formats.
                f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
                byte [] docxBytes = f.ToWord(page, page);
                string tempName = Path.GetFileNameWithoutExtension(pdfPath) + String.Format(" - page {0}.docx", page);
                string docxPath = Path.Combine(docxDir, tempName);
                File.WriteAllBytes(docxPath, docxBytes);

                // Let's show first and last Word pages.
                if (page == 1 || page==f.PageCount)
                    System.Diagnostics.Process.Start(docxPath);
            }
        }
    }
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft

Module Sample
    Sub Main()
        ' Convert whole PDF document to separate Word documents.
        ' Each PDF page will be converted to a single Word document.

        ' Path to a PDF file.
        Dim pdfPath As String = Path.GetFullPath("..\simple text.pdf")

        ' Directory to store Word documents.
        Dim docxDir As String = Path.GetDirectoryName(pdfPath)
        Dim f As New SautinSoft.PdfFocus()
        f.OpenPdf(pdfPath)

        ' Convert each PDF page to separate Word document.
        ' simple text - page 1.docx, simple text- page 2.docx ... simple text - page N.doc.
        For page As Integer = 1 To f.PageCount

            ' You may select between Docx and Rtf formats.
            f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx
            Dim docxBytes() As Byte = f.ToWord(page, page)
            Dim tempName As String = Path.GetFileNameWithoutExtension(pdfPath) & String.Format(" - page {0}.docx", page)
            Dim docxPath As String = Path.Combine(docxDir, tempName)
            File.WriteAllBytes(docxPath, docxBytes)

            ' Let's show first and last Word pages.
            If page = 1 OrElse page = f.PageCount Then
                System.Diagnostics.Process.Start(docxPath)
            End If
        Next page
    End Sub
End Module

Конвертирование PDF-файла в Word

Это простое консольное приложение показывает, как преобразовать весь PDF-файл в формат DOCX. Вы можете выбрать формат выходного документа: DOCX или RTF.

using System;
using System.IO;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            string pdfFile = @"..\..\text and graphics.pdf";
            string wordFile = Path.ChangeExtension(pdfFile, ".docx");

            // Convert a PDF file to a Word file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            //this property is necessary only for registered version
            //f.Serial = "XXXXXXXXXXX";

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // You may choose output format between Docx and Rtf.
                f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
                int result = f.ToWord(wordFile);
                // Show the resulting Word document.
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(wordFile);
                }
            }
        }
    }
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft

Module Sample
    Sub Main()
        Dim pdfFile As String = "..\text and graphics.pdf"
        Dim wordFile As String = Path.ChangeExtension(pdfFile, ".docx")

        ' Convert a PDF file to a Word file
        Dim f As New SautinSoft.PdfFocus()
        'this property is necessary only for registered version
        'f.Serial = "XXXXXXXXXXX"

        f.OpenPdf(pdfFile)
        If f.PageCount > 0 Then
            ' You may choose output format between Docx and Rtf.
            f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx
            Dim result As Integer = f.ToWord(wordFile)
            ' Show the resulting Word document.
            If result = 0 Then
                System.Diagnostics.Process.Start(wordFile)
            End If
        End If
    End Sub
End Module

Конвертирование PDF в Word в памяти

Это простое консольное приложение показывает, как конвертировать PDF в DOCX (RTF) в памяти двумя способами.

  • Первый метод преобразует PDF в формат DOCX, используя массивы байтов.
  • Второй метод показывает, как конвертировать PDF в RTF, работая с MemoryStream.
using System;
using System.IO;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            ConvertPdfToDocxBytes();
            //ConvertPdfToRtfStream();
        }
        private static void ConvertPdfToDocxBytes()
        {
            string pdfFile = @"..\..\simple text.pdf";

            // Assume that we already have a PDF document as array of bytes.
            byte[] pdf = File.ReadAllBytes(pdfFile);
            byte[] docx = null;

            // Convert PDF to word in memory
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            //this property is necessary only for registered version
            //f.Serial = "XXXXXXXXXXX";

            f.OpenPdf(pdf);
            if (f.PageCount > 0)
            {
                // Convert pdf to word in memory.
                docx = f.ToWord();

                // Save word document to a file only for demonstration purposes.
                if (docx != null)
                {
                    //Save to DOCX document to a file for demonstration purposes.
                    string wordFile = Path.ChangeExtension(pdfFile, ".docx");
                    File.WriteAllBytes(wordFile, docx);
                    System.Diagnostics.Process.Start(wordFile);
                }
            }
        }
        private static void ConvertPdfToRtfStream()
        {
            string pdfFile = @"..\..\simple text.pdf";
            MemoryStream rtfStream = new MemoryStream();
            // Convert PDF to word in memory
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            //this property is necessary only for registered version
            //f.Serial = "XXXXXXXXXXX";

            // Assume that we already have a PDF document as stream.
            using (FileStream pdfStream = new FileStream(pdfFile, FileMode.Open, FileAccess.Read))
            {
                f.OpenPdf(pdfStream);
                if (f.PageCount > 0)
                {
                    f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
                    int res = f.ToWord(rtfStream);

                    // Save rtfStream to a file for demonstration purposes.
                    if (res == 0)
                    {
                        string rtfFile = Path.ChangeExtension(pdfFile, ".rtf");
                        File.WriteAllBytes(rtfFile, rtfStream.ToArray());
                        System.Diagnostics.Process.Start(rtfFile);
                    }
                }
            }
        }
    }
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft

Module Sample
    Sub Main()
        ConvertPdfToDocxBytes()
        'ConvertPdfToRtfStream()
    End Sub

    Private Sub ConvertPdfToDocxBytes()
        Dim pdfFile As String = "..\simple text.pdf"

        ' Assume that we already have a PDF document as array of bytes.
        Dim pdf() As Byte = File.ReadAllBytes(pdfFile)
        Dim docx() As Byte = Nothing

        ' Convert PDF to word in memory
        Dim f As New SautinSoft.PdfFocus()
        'this property is necessary only for registered version
        'f.Serial = "XXXXXXXXXXX"

        f.OpenPdf(pdf)
        If f.PageCount > 0 Then
            ' Convert pdf to word in memory.
            docx = f.ToWord()

            ' Save word document to a file only for demonstration purposes.
            If docx IsNot Nothing Then
                'Save to DOCX document to a file for demonstration purposes.
                Dim wordFile As String = Path.ChangeExtension(pdfFile, ".docx")
                File.WriteAllBytes(wordFile, docx)
                System.Diagnostics.Process.Start(wordFile)
            End If
        End If
    End Sub

    Private Sub ConvertPdfToRtfStream()
        Dim pdfFile As String = "..\simple text.pdf"
        Dim rtfStream As New MemoryStream()

        ' Convert PDF to word in memory
        Dim f As New SautinSoft.PdfFocus()
        'this property is necessary only for registered version
        'f.Serial = "XXXXXXXXXXX"

        ' Assume that we already have a PDF document as stream.
        Using pdfStream As New FileStream(pdfFile, FileMode.Open, FileAccess.Read)
            f.OpenPdf(pdfStream)
            If f.PageCount > 0 Then
                f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf
                Dim res As Integer = f.ToWord(rtfStream)

                ' Save rtfStream to a file for demonstration purposes.
                If res = 0 Then
                    Dim rtfFile As String = Path.ChangeExtension(pdfFile, ".rtf")
                    File.WriteAllBytes(rtfFile, rtfStream.ToArray())
                    System.Diagnostics.Process.Start(rtfFile)
                End If
            End If
        End Using
    End Sub
End Module

Конвертируйте PDF в Word в многопоточном режиме

Это консольное приложение показывает, как конвертировать многочисленные PDF-документы в DOCX с использованием многопоточного режима. Каждое преобразование будет выполняться в собственном потоке.

using System;
using System.IO;
using System.Collections.Generic;
using System.Threading;
using SautinSoft;

namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            ConvertPdfToWordInThread();
        }
        public class TArgument
        {
            public string PdfFile { get; set; }
            public int PageNumber { get; set; }
        }
        public static void ConvertPdfToWordInThread()
        {
            string pdfs = @"..\..\";
            string[] files = Directory.GetFiles(pdfs, "*.pdf");
            List<Thread> threads = new List<Thread>();
            for (int i = 0; i < files.Length; i++)
            {
                TArgument targ = new TArgument()
                {
                    PdfFile = files[i],
                    PageNumber = 1
                };
                var t = new Thread((a) => ConvertToWord(a));
                t.Start(targ);
                threads.Add(t);
            }
            foreach (var thread in threads)
            thread.Join();
            Console.WriteLine("Done.");
            Console.ReadLine();
        }

        public static void ConvertToWord(object targ)
        {
            TArgument targum = (TArgument)targ;
            string pdfFile = targum.PdfFile;
            int page = targum.PageNumber;
            string docxFile = Path.ChangeExtension(pdfFile, ".docx");

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
            f.WordOptions.RenderMode = PdfFocus.CWordOptions.eRenderMode.Flowing;
            f.OpenPdf(pdfFile);
            bool done = false;
            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                    page = 1;
                if (f.ToWord(docxFile, page, page) == 0)
                    done = true;
                    f.ClosePdf();
            }
            if (done)
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
            else
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
        }
    }
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports System.Threading
Imports SautinSoft

Module Sample
    Sub Main()
        ConvertPdfToWordInThread()
    End Sub

    Public Class TArgument
        Public Property PdfFile() As String
        Public Property PageNumber() As Integer
    End Class

    Public Sub ConvertPdfToWordInThread()
        Dim pdfs As String = "..\"
        Dim files() As String = Directory.GetFiles(pdfs, "*.pdf")
        Dim threads As New List(Of Thread)()
        For i As Integer = 0 To files.Length - 1
            Dim targ As New TArgument() With {
                .PdfFile = files(i),
                .PageNumber = 1}
            Dim t = New Thread(Sub(a) ConvertToWord(a))
            t.Start(targ)
            threads.Add(t)
        Next i
        For Each t As Thread In threads
            t.Join()
        Next t
        Console.WriteLine("Done.")
        Console.ReadLine()
    End Sub

    Public Sub ConvertToWord(ByVal targ As Object)
        Dim targum As TArgument = DirectCast(targ, TArgument)
        Dim pdfFile As String = targum.PdfFile
        Dim page As Integer = targum.PageNumber
        Dim docxFile As String = Path.ChangeExtension(pdfFile, ".docx")
        Dim f As New SautinSoft.PdfFocus()

        f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx
        f.WordOptions.RenderMode = PdfFocus.CWordOptions.eRenderMode.Flowing
        f.OpenPdf(pdfFile)

        Dim done As Boolean = False
        If f.PageCount > 0 Then
            If page >= f.PageCount Then
                page = 1
            End If
            If f.ToWord(docxFile, page, page) = 0 Then
                done = True
            End If
            f.ClosePdf()
        End If
        If done Then
            Console.WriteLine("{0}" & ControlChars.Tab & " - Done!", Path.GetFileName(pdfFile))
        Else
            Console.WriteLine("{0}" & ControlChars.Tab & " - Error!", Path.GetFileName(pdfFile))
        End If
    End Sub
End Module

Конвертирование URL PDF из Интернета в файл Word

Это консольное приложение показывает, как преобразовать удаленный PDF (URL-ссылка) в файл DOCX.

using System;
using System.IO;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            string remotePdfUrl = @"https://www.sautinsoft.net/samples/simple%20text.pdf";
            string pathToWord = @"..\..\Result.docx";

            //Convert URL-PDF from Internet to a Word file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            //this property is necessary only for registered version
            //f.Serial = "XXXXXXXXXXX";

            Uri uri = new Uri(remotePdfUrl);
            f.OpenPdf(uri);
            if (f.PageCount > 0)
            {
                int result = f.ToWord(pathToWord);
                //Show the resulting Word document
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(pathToWord);
                }
            }
        }
    }
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft

Module Sample
    Sub Main()
        Dim remotePdfUrl As String = "https://www.sautinsoft.net/samples/simple%20text.pdf"
        Dim pathToWord As String = "..\Result.docx"

        'Convert URL-PDF from Internet to a Word file
        Dim f As New SautinSoft.PdfFocus()
        'this property is necessary only for registered version
        'f.Serial = "XXXXXXXXXXX"

        Dim uri As New Uri(remotePdfUrl)
        f.OpenPdf(uri)
        If f.PageCount > 0 Then
            Dim result As Integer = f.ToWord(pathToWord)
            'Show the resulting Word document
            If result = 0 Then
                System.Diagnostics.Process.Start(pathToWord)
            End If
        End If
    End Sub
End Module

Обзор всех свойств для преобразования PDF в Word

Это консольное приложение показывает все возможные настройки, приемлемые для преобразования в форматы Word: DOCX и RTF.

using System;
using System.IO;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            string pdfFile = @"..\..\simple text.pdf";
            string wordFile = Path.ChangeExtension(pdfFile, ".docx");

            // In this sample you will find a short overview of all properties of WordOptions.
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            //this property is necessary only for registered version
            //f.Serial = "XXXXXXXXXXX";

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // You may choose output format between Docx and Rtf.
                f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;

                // As you may know all text in PDF positioned by (x,y) coordinates.
                // In a Word document all text is placed inside paragraphs.
                // Flowing    - The most useful and common type of Word document for editing. The resulting Word document looks as if it was typed by human.
                //              The document layout created without using text boxes.
                
                // Exact      - The most precise and fastest mode. The resulting Word document looks exact as PDF pixel by pixel (x,y).
                //              The document layout created by using text boxes, this gives a monumental accuracy for PDF to Word conversion.
                
                // Continuous - The document layout created by using text boxes grouped in blocks.
                //              A golden mean between Flowing and Exact.
                f.WordOptions.RenderMode = SautinSoft.PdfFocus.CWordOptions.eRenderMode.Flowing;

                // As you may know PDF format doesn't have such concept as tables.
                // It's true, all tables in PDF represented using graphical lines.
                // true - parse all graphic lines to detect and recreate tables.
                // false - leave all graphic lines as is.
                f.WordOptions.DetectTables = true;

                // As you may know PDF contains embedded fonts with own symbol widths.
                // But the resulting Word document will have fonts installed at your system.
                // Sometimes their have different symbol width.
                // true - scale width of symbols to make it the same as in PDF.
                // false - don't scale width of symbols and use width of installed fonts.
                f.WordOptions.KeepCharScaleAndSpacing = false;

                // Sometimes a PDF document can contain a picture with a scanned text.
                // Besides of this, this document can contain invisible text over this picture.
                // In case you need to get only that text and skip picture, you may set 'PreserveImages' to false and
                // set this property to true:
                f.WordOptions.ShowInvisibleText = true;

                //f.PreserveImages = false;
                int result = f.ToWord(wordFile);

                // Show the resulting Word document.
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(wordFile);
                }
            }
        }
    }
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft

Module Sample
    Sub Main()
        Dim pdfFile As String = "..\simple text.pdf"
        Dim wordFile As String = Path.ChangeExtension(pdfFile, ".docx")

        ' In this sample you will find a short overview of all properties of WordOptions.
        Dim f As New SautinSoft.PdfFocus()
        'this property is necessary only for registered version
        'f.Serial = "XXXXXXXXXXX"

        f.OpenPdf(pdfFile)
        If f.PageCount > 0 Then
            ' You may choose output format between Docx and Rtf.
            f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx

            ' As you may know all text in PDF positioned by (x,y) coordinates.
            ' In a Word document all text is placed inside paragraphs.
            ' Flowing     - The most useful and common type of Word document for editing. The resulting Word document looks as if it was typed by human.
            '               The document layout created without using text boxes.
            
            ' Exact       - The most precise and fastest mode. The resulting Word document looks exact as PDF pixel by pixel (x,y).
            '               The document layout created by using text boxes, this gives a monumental accuracy for  PDF to Word conversion.
            
            ' Continuous  - The document layout created by using text boxes grouped in blocks.
            '               A golden mean between Flowing and Exact.
            f.WordOptions.RenderMode = SautinSoft.PdfFocus.CWordOptions.eRenderMode.Flowing

            ' As you may know PDF format doesn't have such concept as tables.
            ' It's true, all tables in PDF represented using graphical lines.
            ' true - parse all graphic lines to detect and recreate tables.
            ' false - leave all graphic lines as is.
            f.WordOptions.DetectTables = True

            ' As you may know PDF contains embedded fonts with own symbol widths.
            ' But the resulting Word document will have fonts installed at your system.
            ' Sometimes their have different symbol width.
            ' true - scale width of symbols to make it the same as in PDF.
            ' false - don't scale width of symbols and use width of installed fonts.
            f.WordOptions.KeepCharScaleAndSpacing = False

            ' Sometimes a PDF document can contain a picture with a scanned text.
            ' Besides of this, this document can contain invisible text over this picture.
            ' In case you need to get only that text and skip picture, you may set 'PreserveImages' to false and
            ' set this property to true:
            f.WordOptions.ShowInvisibleText = True

            'f.PreserveImages = false;
            Dim result As Integer = f.ToWord(wordFile)

            ' Show the resulting Word document.
            If result = 0 Then
                System.Diagnostics.Process.Start(wordFile)
            End If
        End If
    End Sub
End Module

Установка формата вывода: DOCX или RTF

Приложение показывает, как конвертировать PDF в форматы DOCX и RTF.

using System;
using System.IO;
namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            ConvertPdfToDocx();
            //ConvertPdfToRtf();
        }
        private static void ConvertPdfToDocx()
        {
            string pdfFile = @"..\..\text and graphics.pdf";
            string wordFile = Path.ChangeExtension(pdfFile, ".docx");

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            //this property is necessary only for registered version
            //f.Serial = "XXXXXXXXXXX";

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
                int result = f.ToWord(wordFile);
                // Show the produced result.
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(wordFile);
                }
            }
        }
        private static void ConvertPdfToRtf()
        {
            string pdfFile = @"..\..\text and graphics.pdf";
            string wordFile = Path.ChangeExtension(pdfFile, ".rtf");

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            //this property is necessary only for registered version
            //f.Serial = "XXXXXXXXXXX";

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
                int result = f.ToWord(wordFile);
                // Show the produced result.
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(wordFile);
                }
            }
        }
    }
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft

Module Sample
    Sub Main()
        ConvertPdfToDocx()
        'ConvertPdfToRtf()
    End Sub

    Private Sub ConvertPdfToDocx()
        Dim pdfFile As String = "..\text and graphics.pdf"
        Dim wordFile As String = Path.ChangeExtension(pdfFile, ".docx")
        Dim f As New SautinSoft.PdfFocus()
        'this property is necessary only for registered version
        'f.Serial = "XXXXXXXXXXX"

        f.OpenPdf(pdfFile)
        If f.PageCount > 0 Then
            f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx
            Dim result As Integer = f.ToWord(wordFile)
            ' Show the produced result.
            If result = 0 Then
                System.Diagnostics.Process.Start(wordFile)
            End If
        End If
    End Sub
    Private Sub ConvertPdfToRtf()
        Dim pdfFile As String = "..\text and graphics.pdf"
        Dim wordFile As String = Path.ChangeExtension(pdfFile, ".rtf")
        Dim f As New SautinSoft.PdfFocus()
        'this property is necessary only for registered version
        'f.Serial = "XXXXXXXXXXX"

        f.OpenPdf(pdfFile)
        If f.PageCount > 0 Then
            f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf
            Dim result As Integer = f.ToWord(wordFile)
            ' Show the produced result.
            If result = 0 Then
                System.Diagnostics.Process.Start(wordFile)
            End If
        End If
    End Sub
End Module

Другие примеры кода SautinSoft.PdfFocus

✦ PDF в Word PDF в HTML PDF в Img Image из PDF PDF в Excel PDF в XML PDF в Text PDF во Всё
 ВВЕРХ