using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using SautinSoft;
using NSOCR_NameSpace;
using System.Drawing.Imaging;
namespace Sample
{
public class PdfConverter
{
internal NSOCRLib.NSOCRClass NsOCR;
internal int CfgObj = 0;
internal int OcrObj = 0;
internal int ImgObj = 0;
internal int ScanObj = 0;
internal int SvrObj = 0;
internal bool OCRCreated = false;
/// <summary>
/// Converts PDF to DOCX, RTF, HTML, Text with OCR engine.
/// </summary>
public void ConvertPdfToAllWithOCR(string pdfPath)
{
// To perform OCR we'll use free OCR library by Nicomsoft.
// https://www.nicomsoft.com/products/ocr/download/
// The library is freeware and can be used in commercial application.
// Also you have to insert this key: AB2A4DD5FF2A.
NsOCR = new NSOCRLib.NSOCRClass();
// NsOCR.Engine_SetLicenseKey("AB2A4DD5FF2A"); //required for licensed version only
NsOCR.Engine_InitializeAdvanced(out CfgObj, out OcrObj, out ImgObj);
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.OCROptions.Method = PerformOCR;
f.OCROptions.Mode = PdfFocus.COCROptions.eOCRMode.AllImages;
f.WordOptions.KeepCharScaleAndSpacing = false;
string pdfFile = pdfPath;
string outFile = String.Empty;
f.OpenPdf(pdfFile);
if (f.PageCount > 0)
{
// To Docx.
outFile = "Result.docx";
f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
if (f.ToWord(outFile) == 0)
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
// To HTML.
outFile = "Result.html";
f.HtmlOptions.KeepCharScaleAndSpacing = false;
if (f.ToHtml(outFile) == 0)
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
}
else
{
Console.WriteLine("Error: {0}!", f.Exception.Message);
Console.ReadLine();
}
}
private byte[] PerformOCR(System.Drawing.Image scanned)
{
try
{
int res = 0;
try
{
NsOCR.Cfg_SetOption(CfgObj, TNSOCR.BT_DEFAULT, "Languages/English", "1");
Array imgArray = null;
using (MemoryStream ms = new MemoryStream())
{
scanned.Save(ms, ImageFormat.Png);
ms.Flush();
imgArray = ms.ToArray();
}
res = NsOCR.Img_LoadFromMemory(ImgObj, ref imgArray, imgArray.Length);
if (res > TNSOCR.ERROR_FIRST)
return null;
NsOCR.Svr_Create(CfgObj, TNSOCR.SVR_FORMAT_PDF, out SvrObj);
NsOCR.Svr_NewDocument(SvrObj);
res = NsOCR.Img_OCR(ImgObj, TNSOCR.OCRSTEP_FIRST, TNSOCR.OCRSTEP_LAST, TNSOCR.OCRFLAG_NONE);
if (res > TNSOCR.ERROR_FIRST)
return null;
res = NsOCR.Svr_AddPage(SvrObj, ImgObj, TNSOCR.FMT_EXACTCOPY);
if (res > TNSOCR.ERROR_FIRST) return null;
Array outPdf = null;
NsOCR.Svr_SaveToMemory(SvrObj, out outPdf);
return (byte[])outPdf;
}
finally { }
}
catch
{
return null;
}
}
}
class Sample
{
static void Main(string[] args)
{
// To perform OCR we'll use free OCR library by Nicomsoft.
// https://www.nicomsoft.com/products/ocr/download/
// The library is freeware and can be used in commercial application.
PdfConverter converter = new PdfConverter();
string inpFile = Path.GetFullPath(@"..\..\scan.pdf");
converter.ConvertPdfToAllWithOCR(inpFile);
// You are trying to compile this code sample and see the errors:
// NSOCRClass: Engine_SetLicenseKey
// PdfFocus: OCROptions
// 1. Download Nicomsoft OCR SDK from: http://www.nicomsoft.com/files/ocr/free_NSOCR_v70_build885_full.exe
// 2. Install it on your PC or server-side.
// 3. Launch code sample again and enjoy!
// Please, read the full manual - How to use PDF Focus .Net with OCR (Readme.html)
// IMPORTANT: PDF Focus .Net supports OCR since version 7.0
}
}
}
Imports System
Imports System.Collections.Generic
Imports System.Linq
Imports System.Text
Imports System.Threading.Tasks
Imports System.IO
Imports SautinSoft
Imports NSOCR_NameSpace
Imports System.Drawing.Imaging
Namespace Sample
Public Class PdfConverter
Friend NsOCR As NSOCRLib.NSOCRClass
Friend CfgObj As Integer = 0
Friend OcrObj As Integer = 0
Friend ImgObj As Integer = 0
Friend ScanObj As Integer = 0
Friend SvrObj As Integer = 0
Friend OCRCreated As Boolean = False
''' <summary>
''' Converts PDF to DOCX, RTF, HTML, Text with OCR engine.
''' </summary>
Public Sub ConvertPdfToAllWithOCR(ByVal pdfPath As String)
' To perform OCR we'll use free OCR library by Nicomsoft.
' https://www.nicomsoft.com/products/ocr/download/
' The library is freeware and can be used in commercial application.
' Also you have to insert this key: AB2A4DD5FF2A.
NsOCR = New NSOCRLib.NSOCRClass()
'NsOCR.Engine_SetLicenseKey("AB2A4DD5FF2A") 'required for licensed version only
NsOCR.Engine_InitializeAdvanced(CfgObj, OcrObj, ImgObj)
Dim f As New SautinSoft.PdfFocus()
f.OCROptions.Method = AddressOf PerformOCR
f.OCROptions.Mode = PdfFocus.COCROptions.eOCRMode.AllImages
f.WordOptions.KeepCharScaleAndSpacing = False
Dim pdfFile As String = pdfPath
Dim outFile As String = String.Empty
f.OpenPdf(pdfFile)
If f.PageCount > 0 Then
' To Docx.
outFile = "Result.docx"
f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx
If f.ToWord(outFile) = 0 Then
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
End If
' To HTML.
outFile = "Result.html"
f.HtmlOptions.KeepCharScaleAndSpacing = False
If f.ToHtml(outFile) = 0 Then
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
End If
Else
Console.WriteLine("Error: {0}!", f.Exception.Message)
Console.ReadLine()
End If
End Sub
Private Function PerformOCR(ByVal scanned As System.Drawing.Image) As Byte()
Try
Dim res As Integer = 0
Try
NsOCR.Cfg_SetOption(CfgObj, TNSOCR.BT_DEFAULT, "Languages/English", "1")
Dim imgArray As Array = Nothing
Using ms As New MemoryStream()
scanned.Save(ms, ImageFormat.Png)
ms.Flush()
imgArray = ms.ToArray()
End Using
res = NsOCR.Img_LoadFromMemory(ImgObj, imgArray, imgArray.Length)
If res > TNSOCR.ERROR_FIRST Then
Return Nothing
End If
NsOCR.Svr_Create(CfgObj, TNSOCR.SVR_FORMAT_PDF, SvrObj)
NsOCR.Svr_NewDocument(SvrObj)
res = NsOCR.Img_OCR(ImgObj, TNSOCR.OCRSTEP_FIRST, TNSOCR.OCRSTEP_LAST, TNSOCR.OCRFLAG_NONE)
If res > TNSOCR.ERROR_FIRST Then
Return Nothing
End If
res = NsOCR.Svr_AddPage(SvrObj, ImgObj, TNSOCR.FMT_EXACTCOPY)
If res > TNSOCR.ERROR_FIRST Then
Return Nothing
End If
Dim outPdf As Array = Nothing
NsOCR.Svr_SaveToMemory(SvrObj, outPdf)
Return CType(outPdf, Byte())
Finally
End Try
Catch
Return Nothing
End Try
End Function
End Class
Friend Class Sample
Shared Sub Main(ByVal args() As String)
' To perform OCR we'll use free OCR library by Nicomsoft.
' https://www.nicomsoft.com/products/ocr/download/
' The library is freeware and can be used in commercial application.
Dim converter As New PdfConverter()
Dim inpFile As String = Path.GetFullPath("..\scan.pdf")
converter.ConvertPdfToAllWithOCR(inpFile)
' You are trying to compile this code sample and see the errors:
' NSOCRClass: Engine_SetLicenseKey
' PdfFocus: OCROptions
' 1. Download Nicomsoft OCR SDK from: http://www.nicomsoft.com/files/ocr/free_NSOCR_v70_build885_full.exe
' 2. Install it on your PC or server-side.
' 3. Launch code sample again and enjoy!
' Please, read the full manual - How to use PDF Focus .Net with OCR (Readme.html)
' IMPORTANT: PDF Focus .Net supports OCR since version 7.0
End Sub
End Class
End Namespace