Hi,
I am using the OCR module to recognize a document and then search for a viariety of patterns at the OcrLine object level. I can't use the word level becuse some of the patterns span multiple words.
I want to be able to search the OcrLine text and then create a bounding rectangle for each instance of the pattern I find so that I can create an annotation that covers each instance of the pattern. How can I do this?
Here is the basic code I'm using to cycle through the OcrLines and search for the text
// Cycle thru all lines in a text region
foreach (OcrLine oLine in otrRegion.Lines)
{
// Search for string at the line level
// We will either search for social security number patterns or
// search for the string provided in the hidden string to search for
// override parameter if present
if (sOverrideStringToRecognize != String.Empty)
{
if (oLine.Text.ToUpper() == sOverrideStringToRecognize.ToUpper())
{
sWork = System.Windows.Forms.TextRenderer.MeasureText(oLine.Text,oLine.GetFontAt(op.Resolution, iFM, iFB, 0));
lrOcrLineBounds.Add(oLine.Bounds);
iRecogWLineCount++;
}
}
else
{
// We are searching specifically for social security type numbers
// Set up the Regular expression to either search for the full social or all of it but the last 4 digits
if (this.ocrWorkItem.BTConfigHashTable["BTPartiallyRedactSocial"].ToString() == "1")
{
re = new Regex("\\d{3}\\D\\s*.*\\s*\\d{2}\\s*.\\s*\\d{4}", RegexOptions.IgnoreCase);
}
else
{
re = new Regex("\\d{3}\\D\\s*.*\\s*\\d{2}\\s*.\\s*\\d{4}", RegexOptions.IgnoreCase);
}
MatchCollection reMC = re.Matches(oLine.Text);
if (reMC.Count > 0)
{
foreach (Match m in reMC)
{
fntWork = oLine.GetFontAt(op.Resolution,iFM, iFB, m.Index);
sWork = System.Windows.Forms.TextRenderer.MeasureText(m.Value, fntWork);
lrOcrLineBounds.Add(new Rectangle(oLine.Bounds.X,oLine.Bounds.Y,sWork.Width,sWork.Height));
iRecogWLineCount++;
}
}
}