This sample shows how to extract text by words from a PDF page.
Use PdfPage.GetWords() method to extract information about all words with their coordinates from PDF page.
using System.Diagnostics; using BitMiracle.Docotic.Pdf; namespace BitMiracle.Docotic.Pdf.Samples { public static class ExtractTextByWords { public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. string pathToFile = "ExtractTextByWords.pdf"; using (PdfDocument pdf = new PdfDocument(@"Sample Data\form.pdf")) { PdfPage page = pdf.Pages[0]; foreach (PdfTextData data in page.GetWords()) page.Canvas.DrawRectangle(data.Bounds); pdf.Save(pathToFile); } Process.Start(pathToFile); } } }
Imports System.Diagnostics Imports BitMiracle.Docotic.Pdf Namespace BitMiracle.Docotic.Pdf.Samples Public NotInheritable Class ExtractTextByWords Public Shared Sub Main() ' NOTE: ' When used in trial mode, the library imposes some restrictions. ' Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx ' for more information. Dim pathToFile As String = "ExtractTextByWords.pdf" Using pdf As New PdfDocument("Sample Data\form.pdf") Dim page As PdfPage = pdf.Pages(0) For Each data As PdfTextData In page.GetWords() page.Canvas.DrawRectangle(data.Bounds) Next pdf.Save(pathToFile) End Using Process.Start(pathToFile) End Sub End Class End Namespace