Extract text by words

Docotic.Pdf Library Help > Samples > Text > Extract text by words
Docotic.Pdf documentation

This sample shows how to extract text by words from a PDF page.

Use PdfPage.GetWords() method to extract information about all words with their coordinates from PDF page.

C# 
using System.Diagnostics;

using BitMiracle.Docotic.Pdf;

namespace BitMiracle.Docotic.Pdf.Samples
{
    public static class ExtractTextByWords
    {
        public static void Main()
        {
            // NOTE: 
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string pathToFile = "ExtractTextByWords.pdf";

            using (PdfDocument pdf = new PdfDocument(@"Sample Data\form.pdf"))
            {
                PdfPage page = pdf.Pages[0];
                foreach (PdfTextData data in page.GetWords())
                    page.Canvas.DrawRectangle(data.Bounds);

                pdf.Save(pathToFile);
            }

            Process.Start(pathToFile);
        }
    }
}
Visual Basic 
Imports System.Diagnostics

Imports BitMiracle.Docotic.Pdf

Namespace BitMiracle.Docotic.Pdf.Samples
    Public NotInheritable Class ExtractTextByWords
        Public Shared Sub Main()
            ' NOTE: 
            ' When used in trial mode, the library imposes some restrictions.
            ' Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            ' for more information.

            Dim pathToFile As String = "ExtractTextByWords.pdf"

            Using pdf As New PdfDocument("Sample Data\form.pdf")

                Dim page As PdfPage = pdf.Pages(0)
                For Each data As PdfTextData In page.GetWords()
                    page.Canvas.DrawRectangle(data.Bounds)
                Next

                pdf.Save(pathToFile)
            End Using

            Process.Start(pathToFile)
        End Sub
    End Class
End Namespace