Как конвертировать PDF в Word на C# и VB.NET
ASP.NET - Экспорт PDF в Word
Простой ASP.NET для экспорта PDF-документа в формат RTF. Процесс преобразования будет выполняться в памяти и полученный RTF-документ будет отображаться в браузере клиента как встроенный объект.
using System;
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
Result.Text = "";
}
protected void Button1_Click(object sender, EventArgs e)
{
if (FileUpload1.PostedFile.FileName.Length == 0 || FileUpload1.FileBytes.Length == 0)
{
Result.Text = "Please select PDF file at first!";
return;
}
byte[] rtf = null;
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
//this property is necessary only for registered version
//f.Serial = "XXXXXXXXXXX";
f.OpenPdf(FileUpload1.FileBytes);
if (f.PageCount > 0)
{
//Let's whole PDF document to Word (RTF)
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
// You may also set an output format to Docx.
//f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
rtf = f.ToWord();
}
//show Word/rtf
if (rtf != null)
{
ShowResult(rtf, "Result.rtf", "application/msword");
}
else
{
Result.Text = "Converting failed!";
}
}
private void ShowResult(byte[] data, string fileName, string contentType)
{
Response.Buffer = true;
Response.Clear();
Response.ContentType = contentType;
Response.AddHeader("content-disposition", "inline; filename=\"" + fileName + "\"");
Response.BinaryWrite(data);
Response.Flush();
Response.End();
}
}
Imports System
Imports System.Data
Imports System.Configuration
Imports System.Web
Imports System.Web.Security
Imports System.Web.UI
Imports System.Web.UI.WebControls
Imports System.Web.UI.WebControls.WebParts
Imports System.Web.UI.HtmlControls
Imports System.IO
Partial Public Class _Default
Inherits System.Web.UI.Page
Protected Sub Page_Load(ByVal sender As Object, ByVal e As EventArgs)
Result.Text = ""
End Sub
Protected Sub Button1_Click(ByVal sender As Object, ByVal e As EventArgs)
If FileUpload1.PostedFile.FileName.Length = 0 OrElse FileUpload1.FileBytes.Length = 0 Then
Result.Text = "Please select PDF file at first!"
Return
End If
Dim rtf() As Byte = Nothing
Dim f As New SautinSoft.PdfFocus()
'this property is necessary only for registered version
'f.Serial = "XXXXXXXXXXX"
f.OpenPdf(FileUpload1.FileBytes)
If f.PageCount > 0 Then
'Let's whole PDF document to Word (RTF)
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf
' You may also set an output format to Docx.
'f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
rtf = f.ToWord()
End If
'show Word/rtf
If rtf IsNot Nothing Then
ShowResult(rtf, "Result.rtf", "application/msword")
Else
Result.Text = "Converting failed!"
End If
End Sub
Private Sub ShowResult(ByVal data() As Byte, ByVal fileName As String, ByVal contentType As String)
Response.Buffer = True
Response.Clear()
Response.ContentType = contentType
Response.AddHeader("content-disposition", "inline; filename=""" & fileName & """")
Response.BinaryWrite(data)
Response.Flush()
Response.End()
End Sub
End Class
Конвертирование диапазона страниц PDF в Word
Это простое консольное приложение показывает, как преобразовать указанный диапазон страниц (от 2 до 4) из PDF в RTF.
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
string pathToPdf = @"..\..\Potato Beetle.pdf";
string pathToWord = Path.ChangeExtension(pathToPdf, ".rtf");
// Convert diapason of PDF pages to a Word file.
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
// this property is necessary only for registered version.
//f.Serial = "XXXXXXXXXXX";
f.OpenPdf(pathToPdf);
if (f.PageCount > 0)
{
// You may set an output format to docx or rtf.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
// Convert only pages 2 - 4 to Word.
int result = f.ToWord(pathToWord, 2, 4);
// Show Word document
if (result == 0)
{
System.Diagnostics.Process.Start(pathToWord);
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
Dim pathToPdf As String = "..\Potato Beetle.pdf"
Dim pathToWord As String = Path.ChangeExtension(pathToPdf, ".rtf")
' Convert diapason of PDF pages to a Word file.
Dim f As New SautinSoft.PdfFocus()
' this property is necessary only for registered version.
'f.Serial = "XXXXXXXXXXX"
f.OpenPdf(pathToPdf)
If f.PageCount > 0 Then
' You may set an output format to docx or rtf.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf
' Convert only pages 2 - 4 to Word.
Dim result As Integer = f.ToWord(pathToWord, 2, 4)
' Show Word document
If result = 0 Then
System.Diagnostics.Process.Start(pathToWord)
End If
End If
End Sub
End Module
Конвертирование каждой страницы PDF в отдельные документы DOCX
Это простое консольное приложение показывает, как преобразовать каждую страницу PDF-документа в отдельный файл DOCX с именем "{filename} - страница {number}.docx".
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
// Convert whole PDF document to separate Word documents.
// Each PDF page will be converted to a single Word document.
// Path to a PDF file.
string pdfPath = Path.GetFullPath(@"..\..\simple text.pdf");
// Directory to store Word documents.
string docxDir = Path.GetDirectoryName(pdfPath);
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.OpenPdf(pdfPath);
// Convert each PDF page to separate Word document.
// simple text - page 1.docx, simple text- page 2.docx ... simple text - page N.doc.
for (int page = 1; page <= f.PageCount; page++)
{
// You may select between Docx and Rtf formats.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
byte [] docxBytes = f.ToWord(page, page);
string tempName = Path.GetFileNameWithoutExtension(pdfPath) + String.Format(" - page {0}.docx", page);
string docxPath = Path.Combine(docxDir, tempName);
File.WriteAllBytes(docxPath, docxBytes);
// Let's show first and last Word pages.
if (page == 1 || page==f.PageCount)
System.Diagnostics.Process.Start(docxPath);
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
' Convert whole PDF document to separate Word documents.
' Each PDF page will be converted to a single Word document.
' Path to a PDF file.
Dim pdfPath As String = Path.GetFullPath("..\simple text.pdf")
' Directory to store Word documents.
Dim docxDir As String = Path.GetDirectoryName(pdfPath)
Dim f As New SautinSoft.PdfFocus()
f.OpenPdf(pdfPath)
' Convert each PDF page to separate Word document.
' simple text - page 1.docx, simple text- page 2.docx ... simple text - page N.doc.
For page As Integer = 1 To f.PageCount
' You may select between Docx and Rtf formats.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx
Dim docxBytes() As Byte = f.ToWord(page, page)
Dim tempName As String = Path.GetFileNameWithoutExtension(pdfPath) & String.Format(" - page {0}.docx", page)
Dim docxPath As String = Path.Combine(docxDir, tempName)
File.WriteAllBytes(docxPath, docxBytes)
' Let's show first and last Word pages.
If page = 1 OrElse page = f.PageCount Then
System.Diagnostics.Process.Start(docxPath)
End If
Next page
End Sub
End Module
Конвертирование PDF-файла в Word
Это простое консольное приложение показывает, как преобразовать весь PDF-файл в формат DOCX. Вы можете выбрать формат выходного документа: DOCX или RTF.
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
string pdfFile = @"..\..\text and graphics.pdf";
string wordFile = Path.ChangeExtension(pdfFile, ".docx");
// Convert a PDF file to a Word file
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
//this property is necessary only for registered version
//f.Serial = "XXXXXXXXXXX";
f.OpenPdf(pdfFile);
if (f.PageCount > 0)
{
// You may choose output format between Docx and Rtf.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
int result = f.ToWord(wordFile);
// Show the resulting Word document.
if (result == 0)
{
System.Diagnostics.Process.Start(wordFile);
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
Dim pdfFile As String = "..\text and graphics.pdf"
Dim wordFile As String = Path.ChangeExtension(pdfFile, ".docx")
' Convert a PDF file to a Word file
Dim f As New SautinSoft.PdfFocus()
'this property is necessary only for registered version
'f.Serial = "XXXXXXXXXXX"
f.OpenPdf(pdfFile)
If f.PageCount > 0 Then
' You may choose output format between Docx and Rtf.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx
Dim result As Integer = f.ToWord(wordFile)
' Show the resulting Word document.
If result = 0 Then
System.Diagnostics.Process.Start(wordFile)
End If
End If
End Sub
End Module
Конвертирование PDF в Word в памяти
Это простое консольное приложение показывает, как конвертировать PDF в DOCX (RTF) в памяти двумя способами.
- Первый метод преобразует PDF в формат DOCX, используя массивы байтов.
- Второй метод показывает, как конвертировать PDF в RTF, работая с MemoryStream.
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
ConvertPdfToDocxBytes();
//ConvertPdfToRtfStream();
}
private static void ConvertPdfToDocxBytes()
{
string pdfFile = @"..\..\simple text.pdf";
// Assume that we already have a PDF document as array of bytes.
byte[] pdf = File.ReadAllBytes(pdfFile);
byte[] docx = null;
// Convert PDF to word in memory
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
//this property is necessary only for registered version
//f.Serial = "XXXXXXXXXXX";
f.OpenPdf(pdf);
if (f.PageCount > 0)
{
// Convert pdf to word in memory.
docx = f.ToWord();
// Save word document to a file only for demonstration purposes.
if (docx != null)
{
//Save to DOCX document to a file for demonstration purposes.
string wordFile = Path.ChangeExtension(pdfFile, ".docx");
File.WriteAllBytes(wordFile, docx);
System.Diagnostics.Process.Start(wordFile);
}
}
}
private static void ConvertPdfToRtfStream()
{
string pdfFile = @"..\..\simple text.pdf";
MemoryStream rtfStream = new MemoryStream();
// Convert PDF to word in memory
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
//this property is necessary only for registered version
//f.Serial = "XXXXXXXXXXX";
// Assume that we already have a PDF document as stream.
using (FileStream pdfStream = new FileStream(pdfFile, FileMode.Open, FileAccess.Read))
{
f.OpenPdf(pdfStream);
if (f.PageCount > 0)
{
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
int res = f.ToWord(rtfStream);
// Save rtfStream to a file for demonstration purposes.
if (res == 0)
{
string rtfFile = Path.ChangeExtension(pdfFile, ".rtf");
File.WriteAllBytes(rtfFile, rtfStream.ToArray());
System.Diagnostics.Process.Start(rtfFile);
}
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
ConvertPdfToDocxBytes()
'ConvertPdfToRtfStream()
End Sub
Private Sub ConvertPdfToDocxBytes()
Dim pdfFile As String = "..\simple text.pdf"
' Assume that we already have a PDF document as array of bytes.
Dim pdf() As Byte = File.ReadAllBytes(pdfFile)
Dim docx() As Byte = Nothing
' Convert PDF to word in memory
Dim f As New SautinSoft.PdfFocus()
'this property is necessary only for registered version
'f.Serial = "XXXXXXXXXXX"
f.OpenPdf(pdf)
If f.PageCount > 0 Then
' Convert pdf to word in memory.
docx = f.ToWord()
' Save word document to a file only for demonstration purposes.
If docx IsNot Nothing Then
'Save to DOCX document to a file for demonstration purposes.
Dim wordFile As String = Path.ChangeExtension(pdfFile, ".docx")
File.WriteAllBytes(wordFile, docx)
System.Diagnostics.Process.Start(wordFile)
End If
End If
End Sub
Private Sub ConvertPdfToRtfStream()
Dim pdfFile As String = "..\simple text.pdf"
Dim rtfStream As New MemoryStream()
' Convert PDF to word in memory
Dim f As New SautinSoft.PdfFocus()
'this property is necessary only for registered version
'f.Serial = "XXXXXXXXXXX"
' Assume that we already have a PDF document as stream.
Using pdfStream As New FileStream(pdfFile, FileMode.Open, FileAccess.Read)
f.OpenPdf(pdfStream)
If f.PageCount > 0 Then
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf
Dim res As Integer = f.ToWord(rtfStream)
' Save rtfStream to a file for demonstration purposes.
If res = 0 Then
Dim rtfFile As String = Path.ChangeExtension(pdfFile, ".rtf")
File.WriteAllBytes(rtfFile, rtfStream.ToArray())
System.Diagnostics.Process.Start(rtfFile)
End If
End If
End Using
End Sub
End Module
Конвертируйте PDF в Word в многопоточном режиме
Это консольное приложение показывает, как конвертировать многочисленные PDF-документы в DOCX с использованием многопоточного режима. Каждое преобразование будет выполняться в собственном потоке.
using System;
using System.IO;
using System.Collections.Generic;
using System.Threading;
using SautinSoft;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
ConvertPdfToWordInThread();
}
public class TArgument
{
public string PdfFile { get; set; }
public int PageNumber { get; set; }
}
public static void ConvertPdfToWordInThread()
{
string pdfs = @"..\..\";
string[] files = Directory.GetFiles(pdfs, "*.pdf");
List<Thread> threads = new List<Thread>();
for (int i = 0; i < files.Length; i++)
{
TArgument targ = new TArgument()
{
PdfFile = files[i],
PageNumber = 1
};
var t = new Thread((a) => ConvertToWord(a));
t.Start(targ);
threads.Add(t);
}
foreach (var thread in threads)
thread.Join();
Console.WriteLine("Done.");
Console.ReadLine();
}
public static void ConvertToWord(object targ)
{
TArgument targum = (TArgument)targ;
string pdfFile = targum.PdfFile;
int page = targum.PageNumber;
string docxFile = Path.ChangeExtension(pdfFile, ".docx");
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
f.WordOptions.RenderMode = PdfFocus.CWordOptions.eRenderMode.Flowing;
f.OpenPdf(pdfFile);
bool done = false;
if (f.PageCount > 0)
{
if (page >= f.PageCount)
page = 1;
if (f.ToWord(docxFile, page, page) == 0)
done = true;
f.ClosePdf();
}
if (done)
Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
else
Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports System.Threading
Imports SautinSoft
Module Sample
Sub Main()
ConvertPdfToWordInThread()
End Sub
Public Class TArgument
Public Property PdfFile() As String
Public Property PageNumber() As Integer
End Class
Public Sub ConvertPdfToWordInThread()
Dim pdfs As String = "..\"
Dim files() As String = Directory.GetFiles(pdfs, "*.pdf")
Dim threads As New List(Of Thread)()
For i As Integer = 0 To files.Length - 1
Dim targ As New TArgument() With {
.PdfFile = files(i),
.PageNumber = 1}
Dim t = New Thread(Sub(a) ConvertToWord(a))
t.Start(targ)
threads.Add(t)
Next i
For Each t As Thread In threads
t.Join()
Next t
Console.WriteLine("Done.")
Console.ReadLine()
End Sub
Public Sub ConvertToWord(ByVal targ As Object)
Dim targum As TArgument = DirectCast(targ, TArgument)
Dim pdfFile As String = targum.PdfFile
Dim page As Integer = targum.PageNumber
Dim docxFile As String = Path.ChangeExtension(pdfFile, ".docx")
Dim f As New SautinSoft.PdfFocus()
f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx
f.WordOptions.RenderMode = PdfFocus.CWordOptions.eRenderMode.Flowing
f.OpenPdf(pdfFile)
Dim done As Boolean = False
If f.PageCount > 0 Then
If page >= f.PageCount Then
page = 1
End If
If f.ToWord(docxFile, page, page) = 0 Then
done = True
End If
f.ClosePdf()
End If
If done Then
Console.WriteLine("{0}" & ControlChars.Tab & " - Done!", Path.GetFileName(pdfFile))
Else
Console.WriteLine("{0}" & ControlChars.Tab & " - Error!", Path.GetFileName(pdfFile))
End If
End Sub
End Module
Конвертирование URL PDF из Интернета в файл Word
Это консольное приложение показывает, как преобразовать удаленный PDF (URL-ссылка) в файл DOCX.
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
string remotePdfUrl = @"https://www.sautinsoft.net/samples/simple%20text.pdf";
string pathToWord = @"..\..\Result.docx";
//Convert URL-PDF from Internet to a Word file
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
//this property is necessary only for registered version
//f.Serial = "XXXXXXXXXXX";
Uri uri = new Uri(remotePdfUrl);
f.OpenPdf(uri);
if (f.PageCount > 0)
{
int result = f.ToWord(pathToWord);
//Show the resulting Word document
if (result == 0)
{
System.Diagnostics.Process.Start(pathToWord);
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
Dim remotePdfUrl As String = "https://www.sautinsoft.net/samples/simple%20text.pdf"
Dim pathToWord As String = "..\Result.docx"
'Convert URL-PDF from Internet to a Word file
Dim f As New SautinSoft.PdfFocus()
'this property is necessary only for registered version
'f.Serial = "XXXXXXXXXXX"
Dim uri As New Uri(remotePdfUrl)
f.OpenPdf(uri)
If f.PageCount > 0 Then
Dim result As Integer = f.ToWord(pathToWord)
'Show the resulting Word document
If result = 0 Then
System.Diagnostics.Process.Start(pathToWord)
End If
End If
End Sub
End Module
Обзор всех свойств для преобразования PDF в Word
Это консольное приложение показывает все возможные настройки, приемлемые для преобразования в форматы Word: DOCX и RTF.
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
string pdfFile = @"..\..\simple text.pdf";
string wordFile = Path.ChangeExtension(pdfFile, ".docx");
// In this sample you will find a short overview of all properties of WordOptions.
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
//this property is necessary only for registered version
//f.Serial = "XXXXXXXXXXX";
f.OpenPdf(pdfFile);
if (f.PageCount > 0)
{
// You may choose output format between Docx and Rtf.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
// As you may know all text in PDF positioned by (x,y) coordinates.
// In a Word document all text is placed inside paragraphs.
// Flowing - The most useful and common type of Word document for editing. The resulting Word document looks as if it was typed by human.
// The document layout created without using text boxes.
// Exact - The most precise and fastest mode. The resulting Word document looks exact as PDF pixel by pixel (x,y).
// The document layout created by using text boxes, this gives a monumental accuracy for PDF to Word conversion.
// Continuous - The document layout created by using text boxes grouped in blocks.
// A golden mean between Flowing and Exact.
f.WordOptions.RenderMode = SautinSoft.PdfFocus.CWordOptions.eRenderMode.Flowing;
// As you may know PDF format doesn't have such concept as tables.
// It's true, all tables in PDF represented using graphical lines.
// true - parse all graphic lines to detect and recreate tables.
// false - leave all graphic lines as is.
f.WordOptions.DetectTables = true;
// As you may know PDF contains embedded fonts with own symbol widths.
// But the resulting Word document will have fonts installed at your system.
// Sometimes their have different symbol width.
// true - scale width of symbols to make it the same as in PDF.
// false - don't scale width of symbols and use width of installed fonts.
f.WordOptions.KeepCharScaleAndSpacing = false;
// Sometimes a PDF document can contain a picture with a scanned text.
// Besides of this, this document can contain invisible text over this picture.
// In case you need to get only that text and skip picture, you may set 'PreserveImages' to false and
// set this property to true:
f.WordOptions.ShowInvisibleText = true;
//f.PreserveImages = false;
int result = f.ToWord(wordFile);
// Show the resulting Word document.
if (result == 0)
{
System.Diagnostics.Process.Start(wordFile);
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
Dim pdfFile As String = "..\simple text.pdf"
Dim wordFile As String = Path.ChangeExtension(pdfFile, ".docx")
' In this sample you will find a short overview of all properties of WordOptions.
Dim f As New SautinSoft.PdfFocus()
'this property is necessary only for registered version
'f.Serial = "XXXXXXXXXXX"
f.OpenPdf(pdfFile)
If f.PageCount > 0 Then
' You may choose output format between Docx and Rtf.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx
' As you may know all text in PDF positioned by (x,y) coordinates.
' In a Word document all text is placed inside paragraphs.
' Flowing - The most useful and common type of Word document for editing. The resulting Word document looks as if it was typed by human.
' The document layout created without using text boxes.
' Exact - The most precise and fastest mode. The resulting Word document looks exact as PDF pixel by pixel (x,y).
' The document layout created by using text boxes, this gives a monumental accuracy for PDF to Word conversion.
' Continuous - The document layout created by using text boxes grouped in blocks.
' A golden mean between Flowing and Exact.
f.WordOptions.RenderMode = SautinSoft.PdfFocus.CWordOptions.eRenderMode.Flowing
' As you may know PDF format doesn't have such concept as tables.
' It's true, all tables in PDF represented using graphical lines.
' true - parse all graphic lines to detect and recreate tables.
' false - leave all graphic lines as is.
f.WordOptions.DetectTables = True
' As you may know PDF contains embedded fonts with own symbol widths.
' But the resulting Word document will have fonts installed at your system.
' Sometimes their have different symbol width.
' true - scale width of symbols to make it the same as in PDF.
' false - don't scale width of symbols and use width of installed fonts.
f.WordOptions.KeepCharScaleAndSpacing = False
' Sometimes a PDF document can contain a picture with a scanned text.
' Besides of this, this document can contain invisible text over this picture.
' In case you need to get only that text and skip picture, you may set 'PreserveImages' to false and
' set this property to true:
f.WordOptions.ShowInvisibleText = True
'f.PreserveImages = false;
Dim result As Integer = f.ToWord(wordFile)
' Show the resulting Word document.
If result = 0 Then
System.Diagnostics.Process.Start(wordFile)
End If
End If
End Sub
End Module
Установка формата вывода: DOCX или RTF
Приложение показывает, как конвертировать PDF в форматы DOCX и RTF.
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
ConvertPdfToDocx();
//ConvertPdfToRtf();
}
private static void ConvertPdfToDocx()
{
string pdfFile = @"..\..\text and graphics.pdf";
string wordFile = Path.ChangeExtension(pdfFile, ".docx");
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
//this property is necessary only for registered version
//f.Serial = "XXXXXXXXXXX";
f.OpenPdf(pdfFile);
if (f.PageCount > 0)
{
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
int result = f.ToWord(wordFile);
// Show the produced result.
if (result == 0)
{
System.Diagnostics.Process.Start(wordFile);
}
}
}
private static void ConvertPdfToRtf()
{
string pdfFile = @"..\..\text and graphics.pdf";
string wordFile = Path.ChangeExtension(pdfFile, ".rtf");
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
//this property is necessary only for registered version
//f.Serial = "XXXXXXXXXXX";
f.OpenPdf(pdfFile);
if (f.PageCount > 0)
{
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
int result = f.ToWord(wordFile);
// Show the produced result.
if (result == 0)
{
System.Diagnostics.Process.Start(wordFile);
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
ConvertPdfToDocx()
'ConvertPdfToRtf()
End Sub
Private Sub ConvertPdfToDocx()
Dim pdfFile As String = "..\text and graphics.pdf"
Dim wordFile As String = Path.ChangeExtension(pdfFile, ".docx")
Dim f As New SautinSoft.PdfFocus()
'this property is necessary only for registered version
'f.Serial = "XXXXXXXXXXX"
f.OpenPdf(pdfFile)
If f.PageCount > 0 Then
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx
Dim result As Integer = f.ToWord(wordFile)
' Show the produced result.
If result = 0 Then
System.Diagnostics.Process.Start(wordFile)
End If
End If
End Sub
Private Sub ConvertPdfToRtf()
Dim pdfFile As String = "..\text and graphics.pdf"
Dim wordFile As String = Path.ChangeExtension(pdfFile, ".rtf")
Dim f As New SautinSoft.PdfFocus()
'this property is necessary only for registered version
'f.Serial = "XXXXXXXXXXX"
f.OpenPdf(pdfFile)
If f.PageCount > 0 Then
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf
Dim result As Integer = f.ToWord(wordFile)
' Show the produced result.
If result = 0 Then
System.Diagnostics.Process.Start(wordFile)
End If
End If
End Sub
End Module