Как преобразовать каждую страницу PDF файла в отдельные DOCX файлы на C# и .NET
Это простое консольное приложение показывает, как преобразовать каждую страницу PDF-документа в отдельный файл DOCX с помощью name "{filename} - page {number}.docx".
Полный код
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
// Before starting, we recommend to get a free 100-day key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// SautinSoft.PdfFocus.SetLicense("...");
// Convert whole PDF document to separate Word documents.
// Each PDF page will be converted to a single Word document.
// Path to a PDF file.
string pdfPath = Path.GetFullPath(@"..\..\..\simple text.pdf");
// Directory to store Word documents.
string docxDir = Directory.GetCurrentDirectory();
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.OpenPdf(pdfPath);
// Convert each PDF page to separate Word document.
// simple text - page 1.docx, simple text- page 2.docx ... simple text - page N.doc.
for (int page = 1; page <= f.PageCount; page++)
{
// You may select between Docx and Rtf formats.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
byte [] docxBytes = f.ToWord(page, page);
string tempName = Path.GetFileNameWithoutExtension(pdfPath) + String.Format(" - page {0}.docx", page);
string docxPath = Path.Combine(docxDir, tempName);
File.WriteAllBytes(docxPath, docxBytes);
// Let's show first and last Word pages.
if (page == 1 || page==f.PageCount)
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(docxPath) { UseShellExecute = true });
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
' Before starting, we recommend to get a free 100-day key:
' https://sautinsoft.com/start-for-free/
' Apply the key here
' SautinSoft.PdfFocus.SetLicense("...");
' Convert whole PDF document to separate Word documents.
' Each PDF page will be converted to a single Word document.
' Path to a PDF file.
Dim pdfPath As String = Path.GetFullPath("..\..\..\simple text.pdf")
' Directory to store Word documents.
Dim docxDir As String = Directory.GetCurrentDirectory()
Dim f As New SautinSoft.PdfFocus()
f.OpenPdf(pdfPath)
' Convert each PDF page to separate Word document.
' simple text - page 1.docx, simple text- page 2.docx ... simple text - page N.doc.
For page As Integer = 1 To f.PageCount
' You may select between Docx and Rtf formats.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx
Dim docxBytes() As Byte = f.ToWord(page, page)
Dim tempName As String = Path.GetFileNameWithoutExtension(pdfPath) & String.Format(" - page {0}.docx", page)
Dim docxPath As String = Path.Combine(docxDir, tempName)
File.WriteAllBytes(docxPath, docxBytes)
' Let's show first and last Word pages.
If page = 1 OrElse page = f.PageCount Then
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(docxPath) With {.UseShellExecute = True})
End If
Next page
End Sub
End Module
Если вам нужен пример кода или у вас есть вопрос: напишите нам по адресу support@sautinsoft.ru или спросите в онлайн-чате (правый нижний угол этой страницы) или используйте форму ниже: