горизонтально расположенные полосы: белая, синяя, красная

Как конвертировать PDF во все форматы

Примеры использования SautinSoft.PdfFocus с Optical Character Recognition (OCR).

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using SautinSoft;
using NSOCR_NameSpace;
using System.Drawing.Imaging;

namespace Sample
{
    public class PdfConverter
    {
        internal NSOCRLib.NSOCRClass NsOCR;
        internal int CfgObj = 0;
        internal int OcrObj = 0;
        internal int ImgObj = 0;
        internal int ScanObj = 0;
        internal int SvrObj = 0;
        internal bool OCRCreated = false;

        /// <summary>
        /// Converts PDF to DOCX, RTF, HTML, Text with OCR engine.
        /// </summary>
        public void ConvertPdfToAllWithOCR(string pdfPath)
        {
            // To perform OCR we'll use free OCR library by Nicomsoft.
            // https://www.nicomsoft.com/products/ocr/download/
            // The library is freeware and can be used in commercial application.
            // Also you have to insert this key: AB2A4DD5FF2A.
            NsOCR = new NSOCRLib.NSOCRClass();

            // NsOCR.Engine_SetLicenseKey("AB2A4DD5FF2A"); //required for licensed version only
            NsOCR.Engine_InitializeAdvanced(out CfgObj, out OcrObj, out ImgObj);

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            f.OCROptions.Method = PerformOCR;
            f.OCROptions.Mode = PdfFocus.COCROptions.eOCRMode.AllImages;
            f.WordOptions.KeepCharScaleAndSpacing = false;

            string pdfFile = pdfPath;
            string outFile = String.Empty;

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // To Docx.
                outFile = "Result.docx";
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
                if (f.ToWord(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To HTML.
                outFile = "Result.html";
                f.HtmlOptions.KeepCharScaleAndSpacing = false;
                if (f.ToHtml(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
            }
            else
            {
                Console.WriteLine("Error: {0}!", f.Exception.Message);
                Console.ReadLine();
            }
        }
        private byte[] PerformOCR(System.Drawing.Image scanned)
        {
            try
            {
                int res = 0;
                try
                {
                    NsOCR.Cfg_SetOption(CfgObj, TNSOCR.BT_DEFAULT, "Languages/English", "1");
                    Array imgArray = null;
                    using (MemoryStream ms = new MemoryStream())
                    {
                        scanned.Save(ms, ImageFormat.Png);
                        ms.Flush();
                        imgArray = ms.ToArray();
                    }
                    res = NsOCR.Img_LoadFromMemory(ImgObj, ref imgArray, imgArray.Length);
                    if (res > TNSOCR.ERROR_FIRST)
                        return null;

                    NsOCR.Svr_Create(CfgObj, TNSOCR.SVR_FORMAT_PDF, out SvrObj);
                    NsOCR.Svr_NewDocument(SvrObj);

                    res = NsOCR.Img_OCR(ImgObj, TNSOCR.OCRSTEP_FIRST, TNSOCR.OCRSTEP_LAST, TNSOCR.OCRFLAG_NONE);
                    if (res > TNSOCR.ERROR_FIRST)
                        return null;

                    res = NsOCR.Svr_AddPage(SvrObj, ImgObj, TNSOCR.FMT_EXACTCOPY);
                    if (res > TNSOCR.ERROR_FIRST) return null;

                    Array outPdf = null;
                    NsOCR.Svr_SaveToMemory(SvrObj, out outPdf);

                    return (byte[])outPdf;
                }
                finally { }
            }
            catch
            {
                return null;
            }
        }
    }
    class Sample
    {
        static void Main(string[] args)
        {
            // To perform OCR we'll use free OCR library by Nicomsoft.
            // https://www.nicomsoft.com/products/ocr/download/
            // The library is freeware and can be used in commercial application.

            PdfConverter converter = new PdfConverter();
            string inpFile = Path.GetFullPath(@"..\..\scan.pdf");
            converter.ConvertPdfToAllWithOCR(inpFile);

            // You are trying to compile this code sample and see the errors:
            // NSOCRClass: Engine_SetLicenseKey
            // PdfFocus: OCROptions
            
            // 1. Download Nicomsoft OCR SDK from: http://www.nicomsoft.com/files/ocr/free_NSOCR_v70_build885_full.exe
            // 2. Install it on your PC or server-side.
            // 3. Launch code sample again and enjoy!

            // Please, read the full manual - How to use PDF Focus .Net with OCR (Readme.html)
            // IMPORTANT: PDF Focus .Net supports OCR since version 7.0
        }
    }
}
Imports System
Imports System.Collections.Generic
Imports System.Linq
Imports System.Text
Imports System.Threading.Tasks
Imports System.IO
Imports SautinSoft
Imports NSOCR_NameSpace
Imports System.Drawing.Imaging

Namespace Sample
    Public Class PdfConverter
        Friend NsOCR As NSOCRLib.NSOCRClass
        Friend CfgObj As Integer = 0
        Friend OcrObj As Integer = 0
        Friend ImgObj As Integer = 0
        Friend ScanObj As Integer = 0
        Friend SvrObj As Integer = 0
        Friend OCRCreated As Boolean = False

        ''' <summary>
        ''' Converts PDF to DOCX, RTF, HTML, Text with OCR engine.
        ''' </summary>
        Public Sub ConvertPdfToAllWithOCR(ByVal pdfPath As String)
            ' To perform OCR we'll use free OCR library by Nicomsoft.
            ' https://www.nicomsoft.com/products/ocr/download/
            ' The library is freeware and can be used in commercial application.
            ' Also you have to insert this key: AB2A4DD5FF2A.
            NsOCR = New NSOCRLib.NSOCRClass()

            'NsOCR.Engine_SetLicenseKey("AB2A4DD5FF2A") 'required for licensed version only
            NsOCR.Engine_InitializeAdvanced(CfgObj, OcrObj, ImgObj)

            Dim f As New SautinSoft.PdfFocus()
            f.OCROptions.Method = AddressOf PerformOCR
            f.OCROptions.Mode = PdfFocus.COCROptions.eOCRMode.AllImages
            f.WordOptions.KeepCharScaleAndSpacing = False

            Dim pdfFile As String = pdfPath
            Dim outFile As String = String.Empty

            f.OpenPdf(pdfFile)
            If f.PageCount > 0 Then
                ' To Docx.
                outFile = "Result.docx"
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx
                If f.ToWord(outFile) = 0 Then
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
                End If

                ' To HTML.
                outFile = "Result.html"
                f.HtmlOptions.KeepCharScaleAndSpacing = False
                If f.ToHtml(outFile) = 0 Then
                    System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
                End If
            Else
                Console.WriteLine("Error: {0}!", f.Exception.Message)
                Console.ReadLine()
            End If
        End Sub
        Private Function PerformOCR(ByVal scanned As System.Drawing.Image) As Byte()
            Try
                Dim res As Integer = 0
                Try
                    NsOCR.Cfg_SetOption(CfgObj, TNSOCR.BT_DEFAULT, "Languages/English", "1")

                    Dim imgArray As Array = Nothing
                    Using ms As New MemoryStream()
                        scanned.Save(ms, ImageFormat.Png)
                        ms.Flush()
                        imgArray = ms.ToArray()
                    End Using
                    res = NsOCR.Img_LoadFromMemory(ImgObj, imgArray, imgArray.Length)
                    If res > TNSOCR.ERROR_FIRST Then
                        Return Nothing
                    End If

                    NsOCR.Svr_Create(CfgObj, TNSOCR.SVR_FORMAT_PDF, SvrObj)
                    NsOCR.Svr_NewDocument(SvrObj)

                    res = NsOCR.Img_OCR(ImgObj, TNSOCR.OCRSTEP_FIRST, TNSOCR.OCRSTEP_LAST, TNSOCR.OCRFLAG_NONE)
                    If res > TNSOCR.ERROR_FIRST Then
                        Return Nothing
                    End If

                    res = NsOCR.Svr_AddPage(SvrObj, ImgObj, TNSOCR.FMT_EXACTCOPY)
                    If res > TNSOCR.ERROR_FIRST Then
                        Return Nothing
                    End If

                    Dim outPdf As Array = Nothing
                    NsOCR.Svr_SaveToMemory(SvrObj, outPdf)

                    Return CType(outPdf, Byte())
                Finally
                End Try
            Catch
                Return Nothing
            End Try
        End Function
    End Class
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            ' To perform OCR we'll use free OCR library by Nicomsoft.
            ' https://www.nicomsoft.com/products/ocr/download/
            ' The library is freeware and can be used in commercial application.

            Dim converter As New PdfConverter()
            Dim inpFile As String = Path.GetFullPath("..\scan.pdf")
            converter.ConvertPdfToAllWithOCR(inpFile)

            ' You are trying to compile this code sample and see the errors:
            ' NSOCRClass: Engine_SetLicenseKey
            ' PdfFocus: OCROptions
            
            ' 1. Download Nicomsoft OCR SDK from: http://www.nicomsoft.com/files/ocr/free_NSOCR_v70_build885_full.exe
            ' 2. Install it on your PC or server-side.
            ' 3. Launch code sample again and enjoy!

            ' Please, read the full manual - How to use PDF Focus .Net with OCR (Readme.html)
            ' IMPORTANT: PDF Focus .Net supports OCR since version 7.0
        End Sub
    End Class
End Namespace

Конвертируйте PDF во всё в .NET Core с помощью C# и VB.NET

Примеры использования SautinSoft.PdfFocus для преобразования PDF в DOCX, RTF, HTML, XML, Excel (XLS), PNG, Многостраничный TIFF и текстовые форматы в .NET Core.

using System;
using System.IO;
using SautinSoft;
using System.DrawingCore.Imaging;

namespace Sample
{
    class Program
    {
        static void Main(string[] args)
        {
            ConvertPdfToAll();
        }
        /// <summary>
        /// Converts PDF to DOCX, RTF, HTML, XML, Excel (XLS), PNG, Text.
        /// </summary>
        public static void ConvertPdfToAll()
        {
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            string pdfFile = @"..\..\..\simple text.pdf";
            string outFile = String.Empty;

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // To Docx.
                outFile = Path.ChangeExtension(pdfFile, ".docx");
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
                if (f.ToWord(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To Rtf.
                outFile = Path.ChangeExtension(pdfFile, ".rtf");
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Rtf;
                if (f.ToWord(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To Excel.
                outFile = Path.ChangeExtension(pdfFile, ".xls");
                f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToExcel(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To HTML.
                outFile = Path.ChangeExtension(pdfFile, ".html");
                f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToHtml(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To XML.
                outFile = Path.ChangeExtension(pdfFile, ".xml");
                f.XmlOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToXml(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To Image.
                outFile = Path.ChangeExtension(pdfFile, ".png");
                f.ImageOptions.Dpi = 300;
                f.ImageOptions.ImageFormat = ImageFormat.Png;
                if (f.ToImage(outFile, 1) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To Text.
                outFile = Path.ChangeExtension(pdfFile, ".txt");
                if (f.ToText(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
            }
            else
            {
                Console.WriteLine("Error: {0}!", f.Exception.Message);
                Console.ReadLine();
            }
        }
    }
}
Imports System.IO
Imports SautinSoft
Imports System.DrawingCore.Imaging

Module Sample
    Sub Main()
        ConvertPdfToAll()
    End Sub
    ''' <summary>
    ''' Converts PDF to DOCX, RTF, HTML, XML, Excel (XLS), PNG, Text.
    ''' </summary>
    Public Sub ConvertPdfToAll()
        Dim f As New SautinSoft.PdfFocus()

        Dim pdfFile As String = "..\..\..\simple text.pdf"
        Dim outFile As String = String.Empty

        f.OpenPdf(pdfFile)
        If f.PageCount > 0 Then
            ' To Docx.
            outFile = Path.ChangeExtension(pdfFile, ".docx")
            f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx
            If f.ToWord(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To Rtf.
            outFile = Path.ChangeExtension(pdfFile, ".rtf")
            f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Rtf
            If f.ToWord(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To Excel.
            outFile = Path.ChangeExtension(pdfFile, ".xls")
            f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = True
            If f.ToExcel(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To HTML.
            outFile = Path.ChangeExtension(pdfFile, ".html")
            f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = True
            If f.ToHtml(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To XML.
            outFile = Path.ChangeExtension(pdfFile, ".xml")
            f.XmlOptions.ConvertNonTabularDataToSpreadsheet = True
            If f.ToXml(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To Image.
            outFile = Path.ChangeExtension(pdfFile, ".png")
            f.ImageOptions.Dpi = 300
            f.ImageOptions.ImageFormat = ImageFormat.Png
            If f.ToImage(outFile, 1) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To Text.
            outFile = Path.ChangeExtension(pdfFile, ".txt")
            If f.ToText(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If
        Else
            Console.WriteLine("Error: {0}!", f.Exception.Message)
            Console.ReadLine()
        End If
    End Sub
End Module

Конвертируйте PDF во всё в .NET Framework с помощью C# и VB.NET

Пример использования SautinSoft.PdfFocus для преобразования PDF в DOCX, RTF, HTML, XML, Excel (XLS), PNG, Многостраничный TIFF и текстовые форматы в .NET Framework.

using System;
using System.IO;
using SautinSoft;

namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            ConvertPdfToAll();
        }
        /// <summary>
        /// Converts PDF to DOCX, RTF, HTML, XML, Excel (XLS), PNG, Multipage TIFF, Text.
        ///</summary>
        public static void ConvertPdfToAll()
        {
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            string pdfFile = @"..\..\simple text.pdf";
            string outFile = String.Empty;

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // To Docx.
                outFile = Path.ChangeExtension(pdfFile, ".docx");
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
                if (f.ToWord(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To Rtf.
                outFile = Path.ChangeExtension(pdfFile, ".rtf");
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Rtf;
                if (f.ToWord(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To Excel.
                outFile = Path.ChangeExtension(pdfFile, ".xls");
                f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToExcel(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To HTML.
                outFile = Path.ChangeExtension(pdfFile, ".html");
                f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToHtml(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To XML.
                outFile = Path.ChangeExtension(pdfFile, ".xml");
                f.XmlOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToXml(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To Image.
                outFile = Path.ChangeExtension(pdfFile, ".png");
                f.ImageOptions.Dpi = 300;
                f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Png;
                if (f.ToImage(outFile, 1) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To Multipage Tiff (Black & White).
                outFile = Path.ChangeExtension(pdfFile, ".tiff");
                f.ImageOptions.ColorDepth = PdfFocus.CImageOptions.eColorDepth.BlackWhite1bpp;
                if (f.ToMultipageTiff(outFile, System.Drawing.Imaging.EncoderValue.CompressionCCITT4) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });

                // To Text.
                outFile = Path.ChangeExtension(pdfFile, ".txt");
                if (f.ToText(outFile) == 0)
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
            }
            else
            {
                Console.WriteLine("Error: {0}!", f.Exception.Message);
                Console.ReadLine();
            }
        }
    }
}
Imports System.IO
Imports SautinSoft

Module Sample
    Sub Main()
        ConvertPdfToAll()
    End Sub
    ''' <summary>
    ''' Converts PDF to DOCX, RTF, HTML, XML, Excel (XLS), PNG, Multipage TIFF, Text.
    ''' </summary>
    Public Sub ConvertPdfToAll()
        Dim f As New SautinSoft.PdfFocus()

        Dim pdfFile As String = "..\simple text.pdf"
        Dim outFile As String = String.Empty

        f.OpenPdf(pdfFile)
        If f.PageCount > 0 Then
            ' To Docx.
            outFile = Path.ChangeExtension(pdfFile, ".docx")
            f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx
            If f.ToWord(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To Rtf.
            outFile = Path.ChangeExtension(pdfFile, ".rtf")
            f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Rtf
            If f.ToWord(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To Excel.
            outFile = Path.ChangeExtension(pdfFile, ".xls")
            f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = True
            If f.ToExcel(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To HTML.
            outFile = Path.ChangeExtension(pdfFile, ".html")
            f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = True
            If f.ToHtml(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To XML.
            outFile = Path.ChangeExtension(pdfFile, ".xml")
            f.XmlOptions.ConvertNonTabularDataToSpreadsheet = True
            If f.ToXml(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To Image.
            outFile = Path.ChangeExtension(pdfFile, ".png")
            f.ImageOptions.Dpi = 300
            f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Png
            If f.ToImage(outFile, 1) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To Multipage Tiff (Black & White).
            outFile = Path.ChangeExtension(pdfFile, ".tiff")
            f.ImageOptions.ColorDepth = PdfFocus.CImageOptions.eColorDepth.BlackWhite1bpp
            If f.ToMultipageTiff(outFile, System.Drawing.Imaging.EncoderValue.CompressionCCITT4) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If

            ' To Text.
            outFile = Path.ChangeExtension(pdfFile, ".txt")
            If f.ToText(outFile) = 0 Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If
        Else
            Console.WriteLine("Error: {0}!", f.Exception.Message)
            Console.ReadLine()
        End If
    End Sub
End Module

Другие примеры кода SautinSoft.PdfFocus

PDF в Word PDF в HTML PDF в Img Image из PDF PDF в Excel PDF в XML PDF в Text ✦ PDF во Всё
 ВВЕРХ