Skip to content

Commit 95f1753

Browse files
committed
Forther improvements to OCR block sorting
1 parent 5fe5f27 commit 95f1753

File tree

1 file changed

+203
-32
lines changed

1 file changed

+203
-32
lines changed

Telegram/AI/RecognizedTextSelectionManager.cs

Lines changed: 203 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -233,86 +233,257 @@ private static List<RecognizedTextBlock> SortReadingOrder(IEnumerable<Recognized
233233
{
234234
static float VerticalOverlap(BoxInfo a, BoxInfo b)
235235
{
236-
float overlap = Math.Max(0, Math.Min(a.MaxY, b.MaxY) - Math.Max(a.MinY, b.MinY));
236+
// For rotated text, we need to consider the primary reading direction
237+
if (Math.Abs(a.Rotation) > 45f || Math.Abs(b.Rotation) > 45f)
238+
{
239+
// For significantly rotated text, use horizontal overlap as "vertical" overlap
240+
float overlap = Math.Max(0, Math.Min(a.MaxX, b.MaxX) - Math.Max(a.MinX, b.MinX));
241+
float minWidth = Math.Min(a.MaxX - a.MinX, b.MaxX - b.MinX);
242+
return minWidth > 0 ? overlap / minWidth : 0f;
243+
}
244+
245+
float vertOverlap = Math.Max(0, Math.Min(a.MaxY, b.MaxY) - Math.Max(a.MinY, b.MinY));
237246
float minHeight = Math.Min(a.MaxY - a.MinY, b.MaxY - b.MinY);
238-
return overlap / minHeight;
247+
return minHeight > 0 ? vertOverlap / minHeight : 0f;
239248
}
240249

241250
static float HorizontalOverlap(List<BoxInfo> column, BoxInfo box)
242251
{
243-
float colMinX = column.Min(b => b.MinX);
244-
float colMaxX = column.Max(b => b.MaxX);
252+
// Adjust for rotation - if text is significantly rotated, treat vertical as horizontal
253+
bool isRotated = Math.Abs(box.Rotation) > 45f || column.Any(b => Math.Abs(b.Rotation) > 45f);
245254

246-
float overlap = Math.Max(0, Math.Min(colMaxX, box.MaxX) - Math.Max(colMinX, box.MinX));
247-
float minWidth = Math.Min(colMaxX - colMinX, box.MaxX - box.MinX);
248-
return minWidth > 0 ? overlap / minWidth : 0f;
255+
if (isRotated)
256+
{
257+
float colMinY = column.Min(b => b.MinY);
258+
float colMaxY = column.Max(b => b.MaxY);
259+
float overlap = Math.Max(0, Math.Min(colMaxY, box.MaxY) - Math.Max(colMinY, box.MinY));
260+
float minHeight = Math.Min(colMaxY - colMinY, box.MaxY - box.MinY);
261+
return minHeight > 0 ? overlap / minHeight : 0f;
262+
}
263+
else
264+
{
265+
float colMinX = column.Min(b => b.MinX);
266+
float colMaxX = column.Max(b => b.MaxX);
267+
float overlap = Math.Max(0, Math.Min(colMaxX, box.MaxX) - Math.Max(colMinX, box.MinX));
268+
float minWidth = Math.Min(colMaxX - colMinX, box.MaxX - box.MinX);
269+
return minWidth > 0 ? overlap / minWidth : 0f;
270+
}
271+
}
272+
273+
static float CalculateVariance(List<float> values)
274+
{
275+
if (values.Count <= 1) return 0f;
276+
277+
float mean = values.Average();
278+
float sumSquaredDiffs = values.Sum(v => (v - mean) * (v - mean));
279+
return sumSquaredDiffs / values.Count;
280+
}
281+
282+
static float EstimateRotation(RecognizedTextBlock block)
283+
{
284+
// Try to estimate rotation from bounding box shape and line orientations
285+
var bbox = RecognizedTextBoundingBoxHelper.Compute(block.Lines.Select(x => x.BoundingBox));
286+
287+
// Calculate vectors for each side of the bounding box
288+
var topVector = new { X = bbox.TopRight.X - bbox.TopLeft.X, Y = bbox.TopRight.Y - bbox.TopLeft.Y };
289+
var rightVector = new { X = bbox.BottomRight.X - bbox.TopRight.X, Y = bbox.BottomRight.Y - bbox.TopRight.Y };
290+
291+
// Calculate angle of the top edge (primary text direction)
292+
float angle = (float)(Math.Atan2(topVector.Y, topVector.X) * 180.0 / Math.PI);
293+
294+
// Normalize to -180 to 180 range
295+
while (angle > 180) angle -= 360;
296+
while (angle < -180) angle += 360;
297+
298+
return angle;
299+
}
300+
301+
static ColumnAlignment DetectAlignment(List<BoxInfo> boxes)
302+
{
303+
if (boxes.Count <= 1) return ColumnAlignment.Left;
304+
305+
// Group by similar rotation angles (within 15 degrees)
306+
var rotationGroups = boxes.GroupBy(b => Math.Round(b.Rotation / 15f) * 15f).ToList();
307+
var largestGroup = rotationGroups.OrderByDescending(g => g.Count()).First().ToList();
308+
309+
// Use the largest rotation group for alignment detection
310+
float avgRotation = largestGroup.Average(b => b.Rotation);
311+
312+
// Adjust alignment detection based on rotation
313+
List<float> primaryEdges, secondaryEdges, centers;
314+
315+
if (Math.Abs(avgRotation) > 45f && Math.Abs(avgRotation) < 135f)
316+
{
317+
// For ~90 degree rotated text, swap X and Y for alignment detection
318+
primaryEdges = largestGroup.Select(b => b.MinY).ToList();
319+
secondaryEdges = largestGroup.Select(b => b.MaxY).ToList();
320+
centers = largestGroup.Select(b => (b.MinY + b.MaxY) / 2).ToList();
321+
}
322+
else
323+
{
324+
// Normal horizontal or near-horizontal text
325+
primaryEdges = largestGroup.Select(b => b.MinX).ToList();
326+
secondaryEdges = largestGroup.Select(b => b.MaxX).ToList();
327+
centers = largestGroup.Select(b => (b.MinX + b.MaxX) / 2).ToList();
328+
}
329+
330+
float primaryVariance = CalculateVariance(primaryEdges);
331+
float secondaryVariance = CalculateVariance(secondaryEdges);
332+
float centerVariance = CalculateVariance(centers);
333+
334+
float minVariance = Math.Min(Math.Min(primaryVariance, secondaryVariance), centerVariance);
335+
float threshold = 5.0f;
336+
337+
if (minVariance < threshold)
338+
{
339+
if (Math.Abs(minVariance - secondaryVariance) < 0.1f) return ColumnAlignment.Right;
340+
if (Math.Abs(minVariance - primaryVariance) < 0.1f) return ColumnAlignment.Left;
341+
if (Math.Abs(minVariance - centerVariance) < 0.1f) return ColumnAlignment.Center;
342+
}
343+
344+
return ColumnAlignment.Left;
345+
}
346+
347+
static float GetSortingKey(List<BoxInfo> column, ColumnAlignment alignment)
348+
{
349+
// Determine if this column is primarily rotated
350+
float avgRotation = column.Average(b => b.Rotation);
351+
bool isRotated = Math.Abs(avgRotation) > 45f && Math.Abs(avgRotation) < 135f;
352+
353+
if (isRotated)
354+
{
355+
// For rotated text, use Y coordinates for "horizontal" sorting
356+
return alignment switch
357+
{
358+
ColumnAlignment.Left => column.Min(b => b.MinY),
359+
ColumnAlignment.Right => column.Max(b => b.MaxY),
360+
ColumnAlignment.Center => column.Average(b => (b.MinY + b.MaxY) / 2),
361+
_ => column.Min(b => b.MinY)
362+
};
363+
}
364+
else
365+
{
366+
// Normal horizontal text
367+
return alignment switch
368+
{
369+
ColumnAlignment.Left => column.Min(b => b.MinX),
370+
ColumnAlignment.Right => column.Max(b => b.MaxX),
371+
ColumnAlignment.Center => column.Average(b => (b.MinX + b.MaxX) / 2),
372+
_ => column.Min(b => b.MinX)
373+
};
374+
}
375+
}
376+
377+
static float GetPrimarySortKey(BoxInfo box)
378+
{
379+
// Return the primary sorting coordinate based on rotation
380+
if (Math.Abs(box.Rotation) > 45f && Math.Abs(box.Rotation) < 135f)
381+
{
382+
return box.MinX; // For rotated text, sort by X first
383+
}
384+
return box.MinY; // For normal text, sort by Y first
385+
}
386+
387+
static float GetSecondarySortKey(BoxInfo box, ColumnAlignment alignment)
388+
{
389+
// Return the secondary sorting coordinate
390+
if (Math.Abs(box.Rotation) > 45f && Math.Abs(box.Rotation) < 135f)
391+
{
392+
// For rotated text, secondary sort is by Y
393+
return alignment switch
394+
{
395+
ColumnAlignment.Right => -box.MaxY, // Reverse for right alignment
396+
ColumnAlignment.Center => (box.MinY + box.MaxY) / 2,
397+
_ => box.MinY
398+
};
399+
}
400+
else
401+
{
402+
// For normal text, secondary sort is by X
403+
return alignment switch
404+
{
405+
ColumnAlignment.Right => -box.MaxX, // Reverse for right alignment
406+
ColumnAlignment.Center => (box.MinX + box.MaxX) / 2,
407+
_ => box.MinX
408+
};
409+
}
249410
}
250411

251412
var boxInfos = items.Select(obj =>
252413
{
253414
var b = RecognizedTextBoundingBoxHelper.Compute(obj.Lines.Select(x => x.BoundingBox));
254-
255415
float minX = Math.Min(Math.Min(b.TopLeft.X, b.TopRight.X), Math.Min(b.BottomLeft.X, b.BottomRight.X));
256416
float maxX = Math.Max(Math.Max(b.TopLeft.X, b.TopRight.X), Math.Max(b.BottomLeft.X, b.BottomRight.X));
257417
float minY = Math.Min(Math.Min(b.TopLeft.Y, b.TopRight.Y), Math.Min(b.BottomLeft.Y, b.BottomRight.Y));
258418
float maxY = Math.Max(Math.Max(b.TopLeft.Y, b.TopRight.Y), Math.Max(b.BottomLeft.Y, b.BottomRight.Y));
259-
260-
return new BoxInfo(obj, minX, minY, maxX, maxY);
419+
float rotation = EstimateRotation(obj);
420+
return new BoxInfo(obj, minX, minY, maxX, maxY, rotation);
261421
}).ToList();
262422

263-
var rows = new List<List<BoxInfo>>();
264-
foreach (var box in boxInfos.OrderBy(b => b.MinY))
423+
// Group into rows/columns based on rotation
424+
var groups = new List<List<BoxInfo>>();
425+
426+
foreach (var box in boxInfos.OrderBy(GetPrimarySortKey))
265427
{
266-
var row = rows.FirstOrDefault(r => r.Any(rBox => VerticalOverlap(rBox, box) > 0.5f));
267-
if (row == null)
428+
var group = groups.FirstOrDefault(g => g.Any(gBox => VerticalOverlap(gBox, box) > 0.5f));
429+
if (group == null)
268430
{
269-
row = new List<BoxInfo>();
270-
rows.Add(row);
431+
group = new List<BoxInfo>();
432+
groups.Add(group);
271433
}
272-
row.Add(box);
434+
group.Add(box);
273435
}
274436

275437
var orderedBlocks = new List<RecognizedTextBlock>();
276438

277-
foreach (var row in rows.OrderBy(r => r.Min(b => b.MinY)))
439+
foreach (var group in groups.OrderBy(g => g.Min(GetPrimarySortKey)))
278440
{
279-
var columns = new List<List<BoxInfo>>();
441+
var groupAlignment = DetectAlignment(group);
442+
443+
var subColumns = new List<List<BoxInfo>>();
444+
var sortedGroupBoxes = group.OrderBy(b => GetSecondarySortKey(b, groupAlignment));
280445

281-
foreach (var box in row.OrderBy(b => b.MinX))
446+
foreach (var box in sortedGroupBoxes)
282447
{
283-
var col = columns.FirstOrDefault(c => HorizontalOverlap(c, box) > 0.5f);
448+
var col = subColumns.FirstOrDefault(c => HorizontalOverlap(c, box) > 0.5f);
284449
if (col == null)
285450
{
286451
col = new List<BoxInfo>();
287-
columns.Add(col);
452+
subColumns.Add(col);
288453
}
289454
col.Add(box);
290455
}
291456

292-
foreach (var col in columns.OrderBy(c => c.Min(b => b.MinX)))
457+
var sortedColumns = subColumns.OrderBy(c => GetSortingKey(c, groupAlignment));
458+
459+
foreach (var col in sortedColumns)
293460
{
294-
orderedBlocks.AddRange(col.OrderBy(b => b.MinY).Select(b => b.Item));
461+
var sortedColBoxes = col.OrderBy(GetPrimarySortKey)
462+
.ThenBy(b => GetSecondarySortKey(b, groupAlignment));
463+
orderedBlocks.AddRange(sortedColBoxes.Select(b => b.Item));
295464
}
296465
}
297466

298467
return orderedBlocks;
299468
}
300469

301-
private readonly struct BoxInfo
470+
public class BoxInfo
302471
{
303-
public readonly RecognizedTextBlock Item;
304-
public readonly float MinX;
305-
public readonly float MinY;
306-
public readonly float MaxX;
307-
public readonly float MaxY;
308-
309-
public BoxInfo(RecognizedTextBlock item, float minX, float minY, float maxX, float maxY)
472+
public RecognizedTextBlock Item { get; }
473+
public float MinX { get; }
474+
public float MinY { get; }
475+
public float MaxX { get; }
476+
public float MaxY { get; }
477+
public float Rotation { get; }
478+
479+
public BoxInfo(RecognizedTextBlock item, float minX, float minY, float maxX, float maxY, float rotation = 0f)
310480
{
311481
Item = item;
312482
MinX = minX;
313483
MinY = minY;
314484
MaxX = maxX;
315485
MaxY = maxY;
486+
Rotation = rotation;
316487
}
317488
}
318489

0 commit comments

Comments
 (0)