Click or drag to resize

PdfFocusOCROptions Property

Set the properties to attach any OCR library.

Namespace: SautinSoft
Assembly: SautinSoft.PdfFocus (in SautinSoft.PdfFocus.dll) Version: 2024.12.18
Syntax
public PdfFocusCOCROptions OCROptions { get; set; }

Property Value

PdfFocusCOCROptions
Example
Perform OCR using free Nicomsoft SDK in C#
using System.IO;
using SautinSoft;
using System;

namespace Example
{
    class Program
    {
        static void Main(string[] args)
        {
            // Before starting, we recommend to get a free 100-day key:
            // https://sautinsoft.com/start-for-free/

            // Apply the key here:
            // SautinSoft.PdfFocus.SetLicense("...");

            // Note: Please rebuild the project to restore Nuget packages.
            LoadScannedPdf();
        }

        /// <summary>
        /// Load a scanned PDF document with help of Tesseract OCR (free OCR library) and save the result as DOCX document.
        /// </summary>
        static void LoadScannedPdf()
        {
            // Here we'll load a scanned PDF document (perform OCR) containing a text on English, Russian and Vietnamese.
            // Next save the OCR result as a new DOCX document.

            // First steps:

            // 1. Download data files for English, Russian and Vietnamese languages.
            // Please download the files: eng.traineddata, rus.traineddata and vie.traineddata.
            // From here (good and fast): https://github.com/tesseract-ocr/tessdata_fast
            // or (best and slow): https://github.com/tesseract-ocr/tessdata_best

            // 2. Copy the files: eng.traineddata, rus.traineddata and vie.traineddata to
            // the folder "tessdata" in the Project root.

            // 3. Be sure that the folder "tessdata" also contains "pdf.ttf" file.

            // Let's start:
            string inpFile = Path.GetFullPath(@"..\..\..\ARGW64125SX.pdf");
            string outFile = "Result.docx";

            PdfFocus f = new PdfFocus();
            f.OCROptions.Mode = PdfFocus.COCROptions.eOCRMode.AllImages;
            f.OCROptions.OcrResourcePath = @"..\..\..\tessdata";
            f.OCROptions.OcrLanguage = "ron";

            f.OpenPdf(inpFile);
            bool result = false;
            if (f.PageCount > 0)
            {
                result = f.ToWord(outFile) == 0;
            }
            // Open the result for demonstration purposes.
            if (result)
            {
                System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
            }
            else
                Console.WriteLine("Conversion failed!");
        }
    }
}
Perform OCR using free Nicomsoft SDK in VB.Net
Imports System.IO
Imports SautinSoft
Imports System

Namespace Example
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            ' Before starting, we recommend to get a free 100-day key:
            ' https://sautinsoft.com/start-for-free/

            ' Apply the key here:
            ' SautinSoft.PdfFocus.SetLicense("...");

            ' Note: Please rebuild the project to restore Nuget packages.
            LoadScannedPdf()
        End Sub

        ''' <summary>
        ''' Load a scanned PDF document with help of Tesseract OCR (free OCR library) and save the result as DOCX document.
        ''' </summary>
        Private Shared Sub LoadScannedPdf()
            ' Here we'll load a scanned PDF document (perform OCR) containing a text on English, Russian and Vietnamese.
            ' Next save the OCR result as a new DOCX document.

            ' First steps:

            ' 1. Download data files for English, Russian and Vietnamese languages.
            ' Please download the files: eng.traineddata, rus.traineddata and vie.traineddata.
            ' From here (good and fast): https://github.com/tesseract-ocr/tessdata_fast
            ' or (best and slow): https://github.com/tesseract-ocr/tessdata_best

            ' 2. Copy the files: eng.traineddata, rus.traineddata and vie.traineddata to
            ' the folder "tessdata" in the Project root.

            ' 3. Be sure that the folder "tessdata" also contains "pdf.ttf" file.

            ' Let's start:
            Dim inpFile As String = Path.GetFullPath("..\..\..\ARGW64125SX.pdf")
            Dim outFile As String = "Result.docx"

            Dim f As New PdfFocus()
            f.OCROptions.Mode = PdfFocus.COCROptions.eOCRMode.AllImages
            f.OCROptions.OcrResourcePath = "..\..\..\tessdata"
            f.OCROptions.OcrLanguage = "ron"

            f.OpenPdf(inpFile)
            Dim result As Boolean = False
            If f.PageCount > 0 Then
                result = f.ToWord(outFile) = 0
            End If
            ' Open the result for demonstration purposes.
            If result Then
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            Else
                Console.WriteLine("Conversion failed!")
            End If
        End Sub
    End Class
End Namespace
See Also