@@ -233,86 +233,257 @@ private static List<RecognizedTextBlock> SortReadingOrder(IEnumerable<Recognized
233
233
{
234
234
static float VerticalOverlap ( BoxInfo a , BoxInfo b )
235
235
{
236
- float overlap = Math . Max ( 0 , Math . Min ( a . MaxY , b . MaxY ) - Math . Max ( a . MinY , b . MinY ) ) ;
236
+ // For rotated text, we need to consider the primary reading direction
237
+ if ( Math . Abs ( a . Rotation ) > 45f || Math . Abs ( b . Rotation ) > 45f )
238
+ {
239
+ // For significantly rotated text, use horizontal overlap as "vertical" overlap
240
+ float overlap = Math . Max ( 0 , Math . Min ( a . MaxX , b . MaxX ) - Math . Max ( a . MinX , b . MinX ) ) ;
241
+ float minWidth = Math . Min ( a . MaxX - a . MinX , b . MaxX - b . MinX ) ;
242
+ return minWidth > 0 ? overlap / minWidth : 0f ;
243
+ }
244
+
245
+ float vertOverlap = Math . Max ( 0 , Math . Min ( a . MaxY , b . MaxY ) - Math . Max ( a . MinY , b . MinY ) ) ;
237
246
float minHeight = Math . Min ( a . MaxY - a . MinY , b . MaxY - b . MinY ) ;
238
- return overlap / minHeight ;
247
+ return minHeight > 0 ? vertOverlap / minHeight : 0f ;
239
248
}
240
249
241
250
static float HorizontalOverlap ( List < BoxInfo > column , BoxInfo box )
242
251
{
243
- float colMinX = column . Min ( b => b . MinX ) ;
244
- float colMaxX = column . Max ( b => b . MaxX ) ;
252
+ // Adjust for rotation - if text is significantly rotated, treat vertical as horizontal
253
+ bool isRotated = Math . Abs ( box . Rotation ) > 45f || column . Any ( b => Math . Abs ( b . Rotation ) > 45f ) ;
245
254
246
- float overlap = Math . Max ( 0 , Math . Min ( colMaxX , box . MaxX ) - Math . Max ( colMinX , box . MinX ) ) ;
247
- float minWidth = Math . Min ( colMaxX - colMinX , box . MaxX - box . MinX ) ;
248
- return minWidth > 0 ? overlap / minWidth : 0f ;
255
+ if ( isRotated )
256
+ {
257
+ float colMinY = column . Min ( b => b . MinY ) ;
258
+ float colMaxY = column . Max ( b => b . MaxY ) ;
259
+ float overlap = Math . Max ( 0 , Math . Min ( colMaxY , box . MaxY ) - Math . Max ( colMinY , box . MinY ) ) ;
260
+ float minHeight = Math . Min ( colMaxY - colMinY , box . MaxY - box . MinY ) ;
261
+ return minHeight > 0 ? overlap / minHeight : 0f ;
262
+ }
263
+ else
264
+ {
265
+ float colMinX = column . Min ( b => b . MinX ) ;
266
+ float colMaxX = column . Max ( b => b . MaxX ) ;
267
+ float overlap = Math . Max ( 0 , Math . Min ( colMaxX , box . MaxX ) - Math . Max ( colMinX , box . MinX ) ) ;
268
+ float minWidth = Math . Min ( colMaxX - colMinX , box . MaxX - box . MinX ) ;
269
+ return minWidth > 0 ? overlap / minWidth : 0f ;
270
+ }
271
+ }
272
+
273
+ static float CalculateVariance ( List < float > values )
274
+ {
275
+ if ( values . Count <= 1 ) return 0f ;
276
+
277
+ float mean = values . Average ( ) ;
278
+ float sumSquaredDiffs = values . Sum ( v => ( v - mean ) * ( v - mean ) ) ;
279
+ return sumSquaredDiffs / values . Count ;
280
+ }
281
+
282
+ static float EstimateRotation ( RecognizedTextBlock block )
283
+ {
284
+ // Try to estimate rotation from bounding box shape and line orientations
285
+ var bbox = RecognizedTextBoundingBoxHelper . Compute ( block . Lines . Select ( x => x . BoundingBox ) ) ;
286
+
287
+ // Calculate vectors for each side of the bounding box
288
+ var topVector = new { X = bbox . TopRight . X - bbox . TopLeft . X , Y = bbox . TopRight . Y - bbox . TopLeft . Y } ;
289
+ var rightVector = new { X = bbox . BottomRight . X - bbox . TopRight . X , Y = bbox . BottomRight . Y - bbox . TopRight . Y } ;
290
+
291
+ // Calculate angle of the top edge (primary text direction)
292
+ float angle = ( float ) ( Math . Atan2 ( topVector . Y , topVector . X ) * 180.0 / Math . PI ) ;
293
+
294
+ // Normalize to -180 to 180 range
295
+ while ( angle > 180 ) angle -= 360 ;
296
+ while ( angle < - 180 ) angle += 360 ;
297
+
298
+ return angle ;
299
+ }
300
+
301
+ static ColumnAlignment DetectAlignment ( List < BoxInfo > boxes )
302
+ {
303
+ if ( boxes . Count <= 1 ) return ColumnAlignment . Left ;
304
+
305
+ // Group by similar rotation angles (within 15 degrees)
306
+ var rotationGroups = boxes . GroupBy ( b => Math . Round ( b . Rotation / 15f ) * 15f ) . ToList ( ) ;
307
+ var largestGroup = rotationGroups . OrderByDescending ( g => g . Count ( ) ) . First ( ) . ToList ( ) ;
308
+
309
+ // Use the largest rotation group for alignment detection
310
+ float avgRotation = largestGroup . Average ( b => b . Rotation ) ;
311
+
312
+ // Adjust alignment detection based on rotation
313
+ List < float > primaryEdges , secondaryEdges , centers ;
314
+
315
+ if ( Math . Abs ( avgRotation ) > 45f && Math . Abs ( avgRotation ) < 135f )
316
+ {
317
+ // For ~90 degree rotated text, swap X and Y for alignment detection
318
+ primaryEdges = largestGroup . Select ( b => b . MinY ) . ToList ( ) ;
319
+ secondaryEdges = largestGroup . Select ( b => b . MaxY ) . ToList ( ) ;
320
+ centers = largestGroup . Select ( b => ( b . MinY + b . MaxY ) / 2 ) . ToList ( ) ;
321
+ }
322
+ else
323
+ {
324
+ // Normal horizontal or near-horizontal text
325
+ primaryEdges = largestGroup . Select ( b => b . MinX ) . ToList ( ) ;
326
+ secondaryEdges = largestGroup . Select ( b => b . MaxX ) . ToList ( ) ;
327
+ centers = largestGroup . Select ( b => ( b . MinX + b . MaxX ) / 2 ) . ToList ( ) ;
328
+ }
329
+
330
+ float primaryVariance = CalculateVariance ( primaryEdges ) ;
331
+ float secondaryVariance = CalculateVariance ( secondaryEdges ) ;
332
+ float centerVariance = CalculateVariance ( centers ) ;
333
+
334
+ float minVariance = Math . Min ( Math . Min ( primaryVariance , secondaryVariance ) , centerVariance ) ;
335
+ float threshold = 5.0f ;
336
+
337
+ if ( minVariance < threshold )
338
+ {
339
+ if ( Math . Abs ( minVariance - secondaryVariance ) < 0.1f ) return ColumnAlignment . Right ;
340
+ if ( Math . Abs ( minVariance - primaryVariance ) < 0.1f ) return ColumnAlignment . Left ;
341
+ if ( Math . Abs ( minVariance - centerVariance ) < 0.1f ) return ColumnAlignment . Center ;
342
+ }
343
+
344
+ return ColumnAlignment . Left ;
345
+ }
346
+
347
+ static float GetSortingKey ( List < BoxInfo > column , ColumnAlignment alignment )
348
+ {
349
+ // Determine if this column is primarily rotated
350
+ float avgRotation = column . Average ( b => b . Rotation ) ;
351
+ bool isRotated = Math . Abs ( avgRotation ) > 45f && Math . Abs ( avgRotation ) < 135f ;
352
+
353
+ if ( isRotated )
354
+ {
355
+ // For rotated text, use Y coordinates for "horizontal" sorting
356
+ return alignment switch
357
+ {
358
+ ColumnAlignment . Left => column . Min ( b => b . MinY ) ,
359
+ ColumnAlignment . Right => column . Max ( b => b . MaxY ) ,
360
+ ColumnAlignment . Center => column . Average ( b => ( b . MinY + b . MaxY ) / 2 ) ,
361
+ _ => column . Min ( b => b . MinY )
362
+ } ;
363
+ }
364
+ else
365
+ {
366
+ // Normal horizontal text
367
+ return alignment switch
368
+ {
369
+ ColumnAlignment . Left => column . Min ( b => b . MinX ) ,
370
+ ColumnAlignment . Right => column . Max ( b => b . MaxX ) ,
371
+ ColumnAlignment . Center => column . Average ( b => ( b . MinX + b . MaxX ) / 2 ) ,
372
+ _ => column . Min ( b => b . MinX )
373
+ } ;
374
+ }
375
+ }
376
+
377
+ static float GetPrimarySortKey ( BoxInfo box )
378
+ {
379
+ // Return the primary sorting coordinate based on rotation
380
+ if ( Math . Abs ( box . Rotation ) > 45f && Math . Abs ( box . Rotation ) < 135f )
381
+ {
382
+ return box . MinX ; // For rotated text, sort by X first
383
+ }
384
+ return box . MinY ; // For normal text, sort by Y first
385
+ }
386
+
387
+ static float GetSecondarySortKey ( BoxInfo box , ColumnAlignment alignment )
388
+ {
389
+ // Return the secondary sorting coordinate
390
+ if ( Math . Abs ( box . Rotation ) > 45f && Math . Abs ( box . Rotation ) < 135f )
391
+ {
392
+ // For rotated text, secondary sort is by Y
393
+ return alignment switch
394
+ {
395
+ ColumnAlignment . Right => - box . MaxY , // Reverse for right alignment
396
+ ColumnAlignment . Center => ( box . MinY + box . MaxY ) / 2 ,
397
+ _ => box . MinY
398
+ } ;
399
+ }
400
+ else
401
+ {
402
+ // For normal text, secondary sort is by X
403
+ return alignment switch
404
+ {
405
+ ColumnAlignment . Right => - box . MaxX , // Reverse for right alignment
406
+ ColumnAlignment . Center => ( box . MinX + box . MaxX ) / 2 ,
407
+ _ => box . MinX
408
+ } ;
409
+ }
249
410
}
250
411
251
412
var boxInfos = items . Select ( obj =>
252
413
{
253
414
var b = RecognizedTextBoundingBoxHelper . Compute ( obj . Lines . Select ( x => x . BoundingBox ) ) ;
254
-
255
415
float minX = Math . Min ( Math . Min ( b . TopLeft . X , b . TopRight . X ) , Math . Min ( b . BottomLeft . X , b . BottomRight . X ) ) ;
256
416
float maxX = Math . Max ( Math . Max ( b . TopLeft . X , b . TopRight . X ) , Math . Max ( b . BottomLeft . X , b . BottomRight . X ) ) ;
257
417
float minY = Math . Min ( Math . Min ( b . TopLeft . Y , b . TopRight . Y ) , Math . Min ( b . BottomLeft . Y , b . BottomRight . Y ) ) ;
258
418
float maxY = Math . Max ( Math . Max ( b . TopLeft . Y , b . TopRight . Y ) , Math . Max ( b . BottomLeft . Y , b . BottomRight . Y ) ) ;
259
-
260
- return new BoxInfo ( obj , minX , minY , maxX , maxY ) ;
419
+ float rotation = EstimateRotation ( obj ) ;
420
+ return new BoxInfo ( obj , minX , minY , maxX , maxY , rotation ) ;
261
421
} ) . ToList ( ) ;
262
422
263
- var rows = new List < List < BoxInfo > > ( ) ;
264
- foreach ( var box in boxInfos . OrderBy ( b => b . MinY ) )
423
+ // Group into rows/columns based on rotation
424
+ var groups = new List < List < BoxInfo > > ( ) ;
425
+
426
+ foreach ( var box in boxInfos . OrderBy ( GetPrimarySortKey ) )
265
427
{
266
- var row = rows . FirstOrDefault ( r => r . Any ( rBox => VerticalOverlap ( rBox , box ) > 0.5f ) ) ;
267
- if ( row == null )
428
+ var group = groups . FirstOrDefault ( g => g . Any ( gBox => VerticalOverlap ( gBox , box ) > 0.5f ) ) ;
429
+ if ( group == null )
268
430
{
269
- row = new List < BoxInfo > ( ) ;
270
- rows . Add ( row ) ;
431
+ group = new List < BoxInfo > ( ) ;
432
+ groups . Add ( group ) ;
271
433
}
272
- row . Add ( box ) ;
434
+ group . Add ( box ) ;
273
435
}
274
436
275
437
var orderedBlocks = new List < RecognizedTextBlock > ( ) ;
276
438
277
- foreach ( var row in rows . OrderBy ( r => r . Min ( b => b . MinY ) ) )
439
+ foreach ( var group in groups . OrderBy ( g => g . Min ( GetPrimarySortKey ) ) )
278
440
{
279
- var columns = new List < List < BoxInfo > > ( ) ;
441
+ var groupAlignment = DetectAlignment ( group ) ;
442
+
443
+ var subColumns = new List < List < BoxInfo > > ( ) ;
444
+ var sortedGroupBoxes = group . OrderBy ( b => GetSecondarySortKey ( b , groupAlignment ) ) ;
280
445
281
- foreach ( var box in row . OrderBy ( b => b . MinX ) )
446
+ foreach ( var box in sortedGroupBoxes )
282
447
{
283
- var col = columns . FirstOrDefault ( c => HorizontalOverlap ( c , box ) > 0.5f ) ;
448
+ var col = subColumns . FirstOrDefault ( c => HorizontalOverlap ( c , box ) > 0.5f ) ;
284
449
if ( col == null )
285
450
{
286
451
col = new List < BoxInfo > ( ) ;
287
- columns . Add ( col ) ;
452
+ subColumns . Add ( col ) ;
288
453
}
289
454
col . Add ( box ) ;
290
455
}
291
456
292
- foreach ( var col in columns . OrderBy ( c => c . Min ( b => b . MinX ) ) )
457
+ var sortedColumns = subColumns . OrderBy ( c => GetSortingKey ( c , groupAlignment ) ) ;
458
+
459
+ foreach ( var col in sortedColumns )
293
460
{
294
- orderedBlocks . AddRange ( col . OrderBy ( b => b . MinY ) . Select ( b => b . Item ) ) ;
461
+ var sortedColBoxes = col . OrderBy ( GetPrimarySortKey )
462
+ . ThenBy ( b => GetSecondarySortKey ( b , groupAlignment ) ) ;
463
+ orderedBlocks . AddRange ( sortedColBoxes . Select ( b => b . Item ) ) ;
295
464
}
296
465
}
297
466
298
467
return orderedBlocks ;
299
468
}
300
469
301
- private readonly struct BoxInfo
470
+ public class BoxInfo
302
471
{
303
- public readonly RecognizedTextBlock Item ;
304
- public readonly float MinX ;
305
- public readonly float MinY ;
306
- public readonly float MaxX ;
307
- public readonly float MaxY ;
308
-
309
- public BoxInfo ( RecognizedTextBlock item , float minX , float minY , float maxX , float maxY )
472
+ public RecognizedTextBlock Item { get ; }
473
+ public float MinX { get ; }
474
+ public float MinY { get ; }
475
+ public float MaxX { get ; }
476
+ public float MaxY { get ; }
477
+ public float Rotation { get ; }
478
+
479
+ public BoxInfo ( RecognizedTextBlock item , float minX , float minY , float maxX , float maxY , float rotation = 0f )
310
480
{
311
481
Item = item ;
312
482
MinX = minX ;
313
483
MinY = minY ;
314
484
MaxX = maxX ;
315
485
MaxY = maxY ;
486
+ Rotation = rotation ;
316
487
}
317
488
}
318
489
0 commit comments