-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPascalType.Shaper.Script.Arabic.pas
More file actions
1010 lines (846 loc) · 40.2 KB
/
PascalType.Shaper.Script.Arabic.pas
File metadata and controls
1010 lines (846 loc) · 40.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
unit PascalType.Shaper.Script.Arabic;
////////////////////////////////////////////////////////////////////////////////
// //
// Shaper for Arabic, and other cursive scripts. //
// //
// Originally based on the FontKit Arabic shaper (which in turn was //
// based on an old version of the Harfbuzz Arabic shaper. //
// Later updated to align with the current OT specs. //
// //
////////////////////////////////////////////////////////////////////////////////
// //
// Version: MPL 1.1 or LGPL 2.1 with linking exception //
// //
// The contents of this file are subject to the Mozilla Public License //
// Version 1.1 (the "License"); you may not use this file except in //
// compliance with the License. You may obtain a copy of the License at //
// http://www.mozilla.org/MPL/ //
// //
// Software distributed under the License is distributed on an "AS IS" //
// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the //
// License for the specific language governing rights and limitations under //
// the License. //
// //
// Alternatively, the contents of this file may be used under the terms of //
// the Free Pascal modified version of the GNU Lesser General Public //
// License Version 2.1 (the "FPC modified LGPL License"), in which case the //
// provisions of this license are applicable instead of those above. //
// Please see the file LICENSE.txt for additional information concerning //
// this license. //
// //
// The code is part of the PascalType Project //
// //
// The initial developer of this code is Anders Melander. //
// //
// Portions created by Anders Melander are Copyright (C) 2023 //
// by Anders Melander. All Rights Reserved. //
// //
////////////////////////////////////////////////////////////////////////////////
interface
{$I PT_Compiler.inc}
uses
PascalType.Types,
PascalType.Unicode,
PascalType.GlyphString,
PascalType.Shaper,
PascalType.Shaper.Script.Default,
PascalType.Shaper.Plan,
PascalType.Shaper.Layout;
//------------------------------------------------------------------------------
//
// TPascalTypeArabicShaper
//
//------------------------------------------------------------------------------
type
TPascalTypeArabicShaper = class(TCustomPascalTypeShaper)
private type
TStretchElement = (StretchElementStart, StretchElementExt1, StretchElementMid, StretchElementExt2, StretchElementEnd, StretchElementWord);
private
FHasStretching: boolean;
function GetShapingClass(ACodePoint: TPascalTypeCodePoint): ArabicShapingClasses.TShapingClass;
class function IsWord(ACodePoint: TPascalTypeCodePoint): boolean; static;
protected
// Actually private, but unit test needs access to them
function ReorderMarksDelegate(AProcessor: TObject; var AGlyphs: TPascalTypeGlyphString): TTableNames;
function RecordStretchingDelegate(AProcessor: TObject; var AGlyphs: TPascalTypeGlyphString): TTableNames;
function PositionStretchingDelegate(AProcessor: TObject; var AGlyphs: TPascalTypeGlyphString): TTableNames;
protected
function NeedUnicodeComposition: boolean; override;
procedure SetupFeatures(APlan: TPascalTypeShapingPlan; const AFeatures: TPascalTypeFeatures); override;
procedure SetupPostprocessing(APlan: TPascalTypeShapingPlan; const AFeatures: TPascalTypeFeatures); override;
procedure PlanPreprocessing(AStage: TPascalTypeShapingPlanStage); override;
procedure PlanPostprocessing(AStage: TPascalTypeShapingPlanStage); override;
procedure AssignLocalFeatures(const AFeatures: TPascalTypeFeatures; var AGlyphs: TPascalTypeGlyphString); override;
end;
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
implementation
uses
System.Classes,
System.Math,
PascalType.Classes,
PascalType.Shaper.Layout.OpenType,
PascalType.Shaper.OpenType.Processor,
PascalType.Tables.OpenType.Lookup,
PascalType.Tables.OpenType.Substitution.Multiple,
PascalType.Tables.OpenType.Script,
PascalType.Tables.OpenType.LanguageSystem,
PascalType.Tables.OpenType.Feature;
type
TState = 0..6;
TStateEntry = record
PreviousAction: TTableName;
CurrentAction: TTableName;
NextState: TState;
end;
TStateEntries = array[ArabicShapingClasses.TShapingClass.scNon_Joining..ArabicShapingClasses.TShapingClass.scDALATH_RISH] of TStateEntry;
TStateMachine = array[TState] of TStateEntries;
const
// The shaping state machine was ported from Harfbuzz via FontKit.
// https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-ot-shaper-arabic.cc
None = #0#0#0#0;
StateMachine: TStateMachine = (
// Non_Joining, Left_Joining, Right_Joining, Dual_Joining, ALAPH, DALATH RISH
// State 0: prev was U, not willing to join.
((PreviousAction: None; CurrentAction: None; NextState: 0), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: None; CurrentAction: 'isol'; NextState: 1), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: None; CurrentAction: 'isol'; NextState: 1), (PreviousAction: None; CurrentAction: 'isol'; NextState: 6)),
// State 1: prev was R or 'isol'/ALAPH, not willing to join.
((PreviousAction: None; CurrentAction: None; NextState: 0), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: None; CurrentAction: 'isol'; NextState: 1), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: None; CurrentAction: 'fin2'; NextState: 5), (PreviousAction: None; CurrentAction: 'isol'; NextState: 6)),
// State 2: prev was D/L in 'isol' form, willing to join.
((PreviousAction: None; CurrentAction: None; NextState: 0), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: 'init'; CurrentAction: 'fina'; NextState: 1), (PreviousAction: 'init'; CurrentAction: 'fina'; NextState: 3), (PreviousAction: 'init'; CurrentAction: 'fina'; NextState: 4), (PreviousAction: 'init'; CurrentAction: 'fina'; NextState: 6)),
// State 3: prev was D in 'fina' form, willing to join.
((PreviousAction: None; CurrentAction: None; NextState: 0), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: 'medi'; CurrentAction: 'fina'; NextState: 1), (PreviousAction: 'medi'; CurrentAction: 'fina'; NextState: 3), (PreviousAction: 'medi'; CurrentAction: 'fina'; NextState: 4), (PreviousAction: 'medi'; CurrentAction: 'fina'; NextState: 6)),
// State 4: prev was 'fina' ALAPH, not willing to join.
((PreviousAction: None; CurrentAction: None; NextState: 0), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: 'med2'; CurrentAction: 'isol'; NextState: 1), (PreviousAction: 'med2'; CurrentAction: 'isol'; NextState: 2), (PreviousAction: 'med2'; CurrentAction: 'fin2'; NextState: 5), (PreviousAction: 'med2'; CurrentAction: 'isol'; NextState: 6)),
// State 5: prev was 'fin2'/'fin3' ALAPH, not willing to join.
((PreviousAction: None; CurrentAction: None; NextState: 0), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: 'isol'; CurrentAction: 'isol'; NextState: 1), (PreviousAction: 'isol'; CurrentAction: 'isol'; NextState: 2), (PreviousAction: 'isol'; CurrentAction: 'fin2'; NextState: 5), (PreviousAction: 'isol'; CurrentAction: 'isol'; NextState: 6)),
// State 6: prev was DALATH/RISH, not willing to join.
((PreviousAction: None; CurrentAction: None; NextState: 0), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: None; CurrentAction: 'isol'; NextState: 1), (PreviousAction: None; CurrentAction: 'isol'; NextState: 2), (PreviousAction: None; CurrentAction: 'fin3'; NextState: 5), (PreviousAction: None; CurrentAction: 'isol'; NextState: 6))
);
//------------------------------------------------------------------------------
//
// TPascalTypeArabicShaper
//
//------------------------------------------------------------------------------
procedure TPascalTypeArabicShaper.PlanPreprocessing(AStage: TPascalTypeShapingPlanStage);
begin
inherited PlanPreprocessing(AStage);
(*
** Stage 1: Transient reordering of modifier combining marks
**
** "Sequences of adjacent marks must be reordered so that they appear in the
** appropriate visual order before the mark-to-base and mark-to-mark
** positioning features from GPOS can be correctly applied."
**
** Reference: https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-arabic-general.md#stage-1-transient-reordering-of-modifier-combining-marks
*)
AStage.Delegate := ReorderMarksDelegate;
end;
procedure TPascalTypeArabicShaper.SetupFeatures(APlan: TPascalTypeShapingPlan; const AFeatures: TPascalTypeFeatures);
procedure AddFeature(const ATag: TPascalTypeTag; AGlobal: boolean = True);
begin
var Stage := APlan.Stages.Add;
Stage.Add(ATag, AGlobal);
end;
begin
(*
** Stage 4: Applying the stch feature
**
** Reference: https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-syriac.md#stage-4-applying-the-stch-feature
** "The stch feature decomposes and stretches special marks that are meant
** to extend to the full width of words to which they are attached.
** ...
** To apply the stch feature, the shaping engine should first
** decompose the U+070F glyph into components, which results in a
** beginning point, midpoint, and endpoint glyphs plus one (or more)
** extension glyphs: at least one extension between the beginning
** and midpoint glyphs and at least one extension between the
** midpoint and endpoint glyphs."
**
** Reference: https://learn.microsoft.com/en-us/typography/opentype/spec/features_pt#tag-stch
** "This feature defines a decomposition set consisting of an odd
** number of glyphs that can be used to dynamically generate the
** stretching glyph. The odd numbered glyphs in the decomposition
** are fixed reference points that are distributed evenly from
** the start to the end of the enclosed text. The even numbered
** glyphs may be repeated as necessary in the text presentation
** to fill the space between the fixed glyphs. The first and last
** glyphs may either be simple glyphs with width at the baseline,
** or mark glyphs. All other decomposition glyphs should have
** width but must be defined as mark glyphs."
**
** The sequence resulting from the GSUB 'stch' feature decomposition
** must be odd AND at least three glyphs long: [start, mid, end].
**
*)
var Stage := APlan.Stages.Add;
Stage.Add('stch');
Stage.Scope := [ShapingPhaseSubstitution];
Stage.PostDelegate := RecordStretchingDelegate;
(*
** Stage 5: Applying the language-form substitution features from GSUB
**
** "The language-substitution phase applies mandatory substitution features
** using the rules in the font's GSUB table. ... The order in which these
** substitutions must be performed is fixed for all scripts implemented
** with the Arabic shaping model: locl, isol, fina, fin2, fin3, medi, med2,
** init, rlig, rclt, calt"
**
** Reference: https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-arabic-general.md#stage-5-applying-the-language-form-substitution-features-from-gsub
**
** Features are applied in separate stages to enforce the shaper-defined order
*)
AddFeature('locl');
// Language-form substitution features (Mandatory, Joining forms)
// These are assigned locally via the state machine.
AddFeature('isol', False); // Isolated Forms
AddFeature('fina', False); // Terminal Forms
if (Script = 'syrc') then
begin
AddFeature('fin2', False); // Terminal Forms #2
AddFeature('fin3', False); // Terminal Forms #3
end;
AddFeature('medi', False); // Medial Forms
if (Script = 'syrc') then
AddFeature('med2', False); // Medial Forms #2
AddFeature('init', False); // Initial Forms
// Language-form substitution features (Mandatory)
// These are assigned globally to the entire run.
AddFeature('rlig'); // Required Ligatures
AddFeature('rclt'); // Required Contextual Alternates
AddFeature('calt'); // Contextual Alternates
(*
** Stage 6: Applying the typographic-form substitution features from GSUB
**
** "The typographic-substitution phase applies optional substitution features
** using the rules in the font's GSUB table. ... The order in which these
** substitution must be performed is fixed for all scripts implemented in
** the Arabic shaping model: liga, dlig, cswh, clig, mset"
**
** Reference: https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-arabic-general.md#stage-6-applying-the-typographic-form-substitution-features-from-gsub
*)
AddFeature('liga'); // Standard Ligatures, optional
AddFeature('dlig'); // Discretionary Ligatures, optional
AddFeature('cswh'); // Contextual Swash, optional
AddFeature('clig'); // Contextual Ligatures
AddFeature('mset'); // Mark Positioning via Substitution (legacy; Windows 95...)
end;
procedure TPascalTypeArabicShaper.PlanPostprocessing(AStage: TPascalTypeShapingPlanStage);
begin
(*
** Stage 7: Applying the positioning features from GPOS
**
** "The positioning stage adjusts the positions of mark and base glyphs.
** The order in which these features are applied is fixed for all scripts
** implemented in the Arabic shaping model: curs, kern, mark, mkmk"
**
** Reference: https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-arabic-general.md#stage-7-applying-the-positioning-features-from-gpos
*)
AStage.Add(['curs', 'kern', 'mark', 'mkmk']);
end;
procedure TPascalTypeArabicShaper.SetupPostprocessing(APlan: TPascalTypeShapingPlan; const AFeatures: TPascalTypeFeatures);
begin
inherited;
// Add a stage for manual positioning of the stretching components
// This must run after other GPOS features.
var Stage := APlan.Stages.Add;
Stage.Scope := [ShapingPhasePositioning];
Stage.Delegate := PositionStretchingDelegate;
end;
function TPascalTypeArabicShaper.PositionStretchingDelegate(AProcessor: TObject; var AGlyphs: TPascalTypeGlyphString): TTableNames;
begin
// Harfbuzz reference:
//
// https://github.com/harfbuzz/harfbuzz/blob/be79d5426553db9ce4074507c3a7c9afc175078d/src/hb-ot-shaper-arabic.cc#L479
//
// Note that we use a different method than Harfbuzz - and we support both
// RTL and LTR, and we do not assume that all extension glyphs are the same.
Result := nil;
ASSERT(TCustomPascalTypeOpenTypeProcessor(AProcessor).Phase = ShapingPhasePositioning);
if (not FHasStretching) then
exit;
var i := 0;
while (i < AGlyphs.Count) do
begin
var Glyph := AGlyphs[i];
// Find the start of a stretching sequence
if (not Glyph.IsStretched) then
begin
Inc(i);
continue;
end;
var StretchID := Glyph.LigatureID;
var SequenceStart := i;
// Scan forward for the end of the stretching sequence
while (i < AGlyphs.Count) do
begin
Glyph := AGlyphs[i];
if (not Glyph.IsStretched) or (Glyph.LigatureID <> StretchID) then
break;
Inc(i);
end;
var SequenceEnd := i - 1;
// Validate sequence length; We need at least a Start, an Extension, and an End,
// and the count must be odd
var SequenceCount := SequenceEnd - SequenceStart + 1;
if (SequenceCount < 3) or (SequenceCount and 1 = 0) then
// Not a valid stch sequence
continue;
// Find the word after the sequence and calculate the width of it
var WordWidth := 0;
var j := i;
while (j < AGlyphs.Count) do
begin
Glyph := AGlyphs[j];
if (Glyph.IsStretched) or (not IsWord(Glyph.CodePoint)) then
break;
WordWidth := WordWidth + Glyph.XAdvance;
Inc(j);
end;
if (WordWidth <= 0) then
continue;
// Get the sequence glyph widths and temporarily save them in the glyphs.
// We will zero out the widths later, once we are done with them.
for j := SequenceStart to SequenceEnd do
begin
Glyph := AGlyphs[j];
Glyph.XAdvance := Font.GetAdvanceWidth(Glyph.GlyphID);
end;
(*
** Visual Left (Start) Visual Right (End)
**
** . . . .
** .<-----------.<------------.<-----------.
** . . . .
** +-----+ +-----+ +-----+ +-----+
** |Start| | Mid | | Mid | | End |
** +-----+ +-----+ +-----+ +-----+
** . . . .
** . . <------------------------------------ . .
** . . AlignWidth . .
** . .
** .<------------------------------------------->.
** . WordWidth .
** ^
** |
** +-- logical X = 0 (for LTR)
** |
** +-- logical X = WordWidth (for RTL)
**
** In the fonts we have investigated, the glyph order of the stretch sequence is:
**
** Index 0: End glyph
** Index Last: Start glyph
**
** The End glyph (Index 0) is always positioned to the visual right of the word.
** The Start glyph (Index Last) is always positioned to the visual left of the word.
**
** For example, for a 5 glyph sequence, the order is:
**
** 0. End (positioned at visual right)
** 1. Ext
** 2. Mid
** 3. Ext
** 4. Start (positioned at visual left)
*)
// Find the two points between which we will align all glyphs of the sequence.
// The offset of the points is half of the width of the start and end glyphs.
var OffsetStart := AGlyphs[SequenceEnd].XAdvance div 2;
var OffsetEnd := AGlyphs[SequenceStart].XAdvance div 2;
var AlignWidth := WordWidth - OffsetStart - OffsetEnd;
(*
** 1. Start with the easy part; Position the fixed glyphs.
*)
var NumFixed := (SequenceCount div 2) + 1; // How many fixed?
ASSERT(NumFixed >= 2);
var BaseX := 0;
if (AGlyphs.Direction = dirRightToLeft) then
BaseX := -WordWidth;
for j := 0 to NumFixed-1 do
begin
// Divide the widths into NumFixed-1 parts. Place the glyph centered on the
// boundary of each part, starting from visual right (Index 0).
//
// OffsetStart
// . . .
// .----------->.------------>.
// . . .
// +-----+ +-----+ +-----+
// |Fixed| |Fixed| |Fixed|
// +-----+ +-----+ +-----+
// . .
// .<------------------------>.
// . AlignWidth .
//
var Index := SequenceStart + j * 2;
Glyph := AGlyphs[Index];
ASSERT(Glyph.LigatureComponent and 1 = 0);
var CenterP := (WordWidth - OffsetEnd) - AlignWidth * j div (NumFixed-1);
Glyph.XOffset := BaseX + CenterP - (Glyph.XAdvance div 2);
// Position the prior extension glyph (if there is one) abut the fixed glyph.
// Since we are moving from right to left, the extension glyph is to the visual
// right of the fixed glyph.
//
// Logical order Visual order
//
// [Index] [Index]
// [Index-1] | | [Index-1]
// | | | |
// v v v v
// +-----+ +-----++-----+ +-----++-----+ +-----+
// |Fixed|...| Ext ||Fixed| |Fixed|| Ext |...|Fixed|
// +-----+ +-----++-----+ +-----++-----+ +-----+
//
if (j > 0) then
AGlyphs[Index - 1].XOffset := Glyph.XOffset + Glyph.XAdvance;
// We're done with the width of this fixed glyph; Zero the advance
Glyph.XAdvance := 0;
end;
(*
** 2. Now, for each extension segment, duplicate the extension glyph to fill
** the space between the neighboring fixed glyphs.
**
** We iterate backward through the sequence to maintain index stability
** during insertion.
**
** https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-arabic-general.md#stage-4-applying-the-stch-feature
** "- All of the glyphs in the sequence for the mark, except for the final glyph (the "end" glyph),
** are repositioned as a group so that they precede the word...
** - The final glyph in the mark sequence is repositioned to the end of the word."
**
** Note that we do not follow this rule strictly. Instead we keep all the
** sequence glyphs at their original position, and instead offset them all
** horizontally so they align with the word.
** FWIW, Harfbuzz does the same.
*)
j := SequenceEnd;
while (j >= SequenceStart) do
begin
Glyph := AGlyphs[j];
if (Glyph.LigatureComponent and 1 = 1) then
begin
(*
** Repeating extension glyph.
*)
var GlyphWidth := Glyph.XAdvance;
if (GlyphWidth = 0) then
begin
Dec(j);
continue;
end;
// The gap to fill is between the current extension glyph (at index j)
// and the fixed glyph to its visual right (at index j-1).
// Since we are moving right-to-left visually, Glyph[j-1].XOffset is
// the right edge of the gap, and Glyph[j].XOffset is the left edge.
//
// Logical order Visual order
//
// [j-1] [j] [j] [j-1]
// | | | |
// v v v v
// +-----++-----+ +-----++-----+ +-----++-----+ +-----++-----+
// | Ext ||Fixed|...| Ext ||Fixed| |Fixed|| Ext |...|Fixed|| Ext |
// +-----++-----+ +-----++-----+ +-----++-----+ +-----++-----+
// . .
// .<--Gap-->.
// . .
var GapWidth := AGlyphs[j-1].XOffset - Glyph.XOffset;
// How many extension glyphs do we need in total (rounded up)?
var NumExtensions := Max(0, (GapWidth + GlyphWidth - 1) div GlyphWidth);
// If the width of the extension glyphs exceed the available space, we
// make the extension glyphs overlap in order to distribute the extra
// space among them.
var Overlap := 0;
if (NumExtensions > 0) then
begin
var ExtensionWidth := GlyphWidth * NumExtensions;
// How much overlap for each glyph?
if (ExtensionWidth > GapWidth) then
Overlap := (ExtensionWidth - GapWidth) div NumExtensions;
end;
// Apply the overlap to the already existing extension glyph
Glyph.XOffset := Glyph.XOffset - Overlap;
// Adjust the advance width, that we will use to position the remaining
// extension glyphs, to take the overlap into account.
GlyphWidth := Glyph.XAdvance - Overlap;
// We're done with the width of this glyph; Zero the advance
Glyph.XAdvance := 0;
var x := Glyph.XOffset + GlyphWidth;
{$ifdef DEBUG_STCH}
Glyph.YOffset := 50;
{$endif DEBUG_STCH}
// Replicate the extension glyph and position the copies.
for var n := 1 to NumExtensions-1 do // -1 because we already have one
begin
var NewGlyph := AGlyphs.Insert(j + 1);
NewGlyph.Assign(Glyph);
// TODO : Should Assign() copy LigatureID and LigatureComponent ?
NewGlyph.LigatureID := Glyph.LigatureID;
NewGlyph.LigatureComponent := Glyph.LigatureComponent;
NewGlyph.XOffset := x;
Inc(x, GlyphWidth);
{$ifdef DEBUG_STCH}
if (n and 1 = 0) then
NewGlyph.YOffset := 50
else
NewGlyph.YOffset := -50;
{$endif DEBUG_STCH}
// Adjust sequence bounds after insert
Inc(i);
end;
end;
Dec(j);
end;
end;
end;
procedure TPascalTypeArabicShaper.AssignLocalFeatures(const AFeatures: TPascalTypeFeatures; var AGlyphs: TPascalTypeGlyphString);
procedure ApplySyriacAlaphRules(var Actions: TArray<TPascalTypeTag>; AWordStopIndex: integer);
var
PrevIndex: integer;
p: integer;
ShapingClass: ArabicShapingClasses.TShapingClass;
begin
(*
** Stage 3: Computing letter joining states
**
** "After testing the final character of the word, if the text is in <syrc>
** and if the last character that is not JOINING_TYPE_TRANSPARENT or
** JOINING_TYPE_NON_JOINING is "Alaph", perform an additional test:"
**
** Reference: https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-arabic-general.md#stage-3-computing-letter-joining-states
*)
if (Script <> 'syrc') or (AWordStopIndex = -1) then
exit;
// Find the last character that is not TRANSPARENT or NON_JOINING
var AlaphIndex := -1;
for p := AWordStopIndex downto 0 do
begin
ShapingClass := GetShapingClass(AGlyphs[p].CodePoint);
if (ShapingClass <> scTransparent) and (ShapingClass <> scNon_Joining) then
begin
if (ShapingClass = scALAPH) then
AlaphIndex := p;
break;
end;
end;
if (AlaphIndex = -1) then
exit;
// Find character preceding Alaph (skipping transparent)
PrevIndex := -1;
for p := AlaphIndex - 1 downto 0 do
begin
ShapingClass := GetShapingClass(AGlyphs[p].CodePoint);
if (ShapingClass <> scTransparent) then
begin
PrevIndex := p;
break;
end;
end;
if (PrevIndex <> -1) then
begin
ShapingClass := GetShapingClass(AGlyphs[PrevIndex].CodePoint);
(*
** "If the preceding character is JOINING_TYPE_LEFT, tag the current character for fina"
*)
if (ShapingClass = scLeft_Joining) then
Actions[AlaphIndex] := 'fina'
else
(*
** "If the preceding character's JOINING_GROUP is DALATH_RISH, tag the current character for fin3"
*)
if (ShapingClass = scDALATH_RISH) then
Actions[AlaphIndex] := 'fin3'
else
(*
** "Otherwise, tag the current character for fin2"
*)
Actions[AlaphIndex] := 'fin2';
end else
// Alaph is the first joining character in the word
Actions[AlaphIndex] := 'fin2';
end;
var
Actions: TArray<TPascalTypeTag>;
StateEntry: TStateEntry;
begin
inherited AssignLocalFeatures(AFeatures, AGlyphs);
// Apply the state machine to map glyphs to features
SetLength(Actions, AGlyphs.Count);
var State: TState := 0;
var PreviousIndex := -1;
for var i := 0 to AGlyphs.Count-1 do
begin
var Glyph := AGlyphs[i];
var CodePoint := Glyph.CodePoint;
var ShapingClass := GetShapingClass(CodePoint);
// Identify word boundaries for Syriac Alaph rules.
// Space and control characters are considered word boundaries.
// Note: Join control characters (ZWJ, ZWNJ) are excluded as they do not
// represent word boundaries and must not reset the joining state.
if (PascalTypeUnicode.IsSpace(CodePoint)) or
(PascalTypeUnicode.IsControl(CodePoint) and (not PascalTypeUnicode.IsJoinControl(CodePoint))) then
begin
ApplySyriacAlaphRules(Actions, i - 1);
State := 0;
PreviousIndex := -1;
Actions[i] := 0;
continue;
end;
if (ShapingClass = scTransparent) then
begin
Actions[i] := 0;
continue;
end;
StateEntry := StateMachine[State, ShapingClass];
State := StateEntry.NextState;
if (TPascalTypeTag(StateEntry.PreviousAction) <> 0) and (PreviousIndex <> -1) then
begin
// Join Controls (ZWJ/ZWNJ) should not receive joining features via PreviousAction.
// Instead, they act as joining partners that influence the preceding glyph.
if (not PascalTypeUnicode.IsJoinControl(AGlyphs[PreviousIndex].CodePoint)) then
Actions[PreviousIndex] := StateEntry.PreviousAction;
end;
// Join Controls should never receive joining features themselves.
// They are used to influence the state machine but are not themselves substituted.
if (PascalTypeUnicode.IsJoinControl(CodePoint)) then
Actions[i] := 0
else
Actions[i] := StateEntry.CurrentAction;
PreviousIndex := i;
end;
// Final word in the string
ApplySyriacAlaphRules(Actions, AGlyphs.Count - 1);
// Apply the chosen features to their respective glyphs.
// Joining features (isol, fina, etc.) must be propagated from a base character
// to any following transparent characters (marks, variation selectors)
// to ensure that ligatures spanning across them match correctly.
var Action: TPascalTypeTag := 0;
for var i := 0 to AGlyphs.Count-1 do
begin
var CodePoint := AGlyphs[i].CodePoint;
var ShapingClass := GetShapingClass(CodePoint);
if (ShapingClass <> scTransparent) then
Action := Actions[i];
if (Action <> 0) then
AGlyphs[i].PlanFeatures.Add(Action);
end;
end;
function TPascalTypeArabicShaper.RecordStretchingDelegate(AProcessor: TObject; var AGlyphs: TPascalTypeGlyphString): TTableNames;
begin
// Harfbuzz:
// https://github.com/harfbuzz/harfbuzz/blob/be79d5426553db9ce4074507c3a7c9afc175078d/src/hb-ot-shaper-arabic.cc#L453
Result := nil;
ASSERT(TCustomPascalTypeOpenTypeProcessor(AProcessor).Phase = ShapingPhaseSubstitution);
var i := 0;
while (i < AGlyphs.Count) do
begin
var Glyph := AGlyphs[i];
var StartIndex := i;
Inc(i);
// Look for glyphs produced by Multiple Substitution during the 'stch' stage.
// TOpenTypeSubstitutionSubTableMultipleList sets IsMultiplied to True for
// all but the first glyph in the substitution sequence.
if (not Glyph.IsMultiplied) or (Glyph.IsStretched) then
continue;
// We found the second or later glyph of a Multiple Substitution.
// We need to back up to the first one (LigatureComponent = 0).
while (StartIndex > 0) and (AGlyphs[StartIndex].LigatureComponent <> 0) do
Dec(StartIndex);
// Verify that this substitution was indeed caused by the 'stch' feature.
if (not AGlyphs[StartIndex].PlanFeatures.Contains('stch')) then
continue;
// Signal that we will need to adjust advance and width in the GPOS post-processing stage
FHasStretching := True;
// Tag all glyphs in the sequence with a unique ligature ID
var StretchID := AGlyphs.GetNextLigatureID;
var j := StartIndex;
while (j < AGlyphs.Count) and (not AGlyphs[j].IsStretched) do
begin
Glyph := AGlyphs[j];
if (j > StartIndex) and (Glyph.LigatureComponent = 0) then
break; // Glyph is an unrelated ligature
if (Glyph.LigatureComponent = -1) then
break; // We're past the end of the sequence
// Tag the glyph
Glyph.IsStretched := True;
Glyph.LigatureID := StretchID;
Inc(j);
end;
// Move past the sequence we just processed
if (j > i) then
i := j;
end;
end;
function TPascalTypeArabicShaper.ReorderMarksDelegate(AProcessor: TObject; var AGlyphs: TPascalTypeGlyphString): TTableNames;
procedure ReorderRange(AStart, AStop: integer);
var
MCM_Count: integer;
begin
(*
** AMTRA (Arabic Mark Transient Reordering Algorithm)
**
** Reference: https://www.unicode.org/reports/tr53/
**
** 2. For each sequence of non-starter characters S:
*)
// 2a. Move any Shadda characters to the beginning of S.
// We use a stable sort or manual bubble-up to maintain relative order of other marks.
for var i := AStart + 1 to AStop do
begin
if (PascalTypeUnicode.CanonicalCombiningClass(AGlyphs[i].CodePoint) = 33) then
begin
var k := i;
while (k > AStart) and (PascalTypeUnicode.CanonicalCombiningClass(AGlyphs[k-1].CodePoint) <> 33) do
begin
AGlyphs.Move(k, k - 1);
Dec(k);
end;
end;
end;
// 2b. Move any subsequence of characters with ccc=230 that begins with an MCM character to the beginning of S.
// 2c. Move any subsequence of characters with ccc=220 that begins with an MCM character to the beginning of S.
// Note: 220_MCM moves to the very beginning, before 230_MCM and Shadda.
// Handle ccc=230
MCM_Count := 0;
var j := AStart;
while (j <= AStop) do
begin
if (PascalTypeUnicode.CanonicalCombiningClass(AGlyphs[j].CodePoint) = PascalTypeUnicode.cccAbove) then
begin
var MCMStart := j;
// Find the end of the ccc=230 subsequence
while (j < AStop) and (PascalTypeUnicode.CanonicalCombiningClass(AGlyphs[j+1].CodePoint) = PascalTypeUnicode.cccAbove) do
Inc(j);
// If it starts with an MCM, move the whole subsequence to the start of S (but after any already moved MCMs)
if (PascalTypeUnicode.IsModifierCombiningMark(AGlyphs[MCMStart].CodePoint)) then
begin
for var k := 0 to j - MCMStart do
AGlyphs.Move(MCMStart + k, AStart + MCM_Count + k);
MCM_Count := MCM_Count + (j - MCMStart + 1);
end;
end;
Inc(j);
end;
// Handle ccc=220 (these go to the absolute front of the mark sequence)
// Note: We don't use MCM_Count here because 220_MCM goes before 230_MCM and Shadda.
// However, if there are multiple 220_MCM sequences, we need to maintain their order.
var MCM220_Count := 0;
j := AStart;
while (j <= AStop) do
begin
if (PascalTypeUnicode.CanonicalCombiningClass(AGlyphs[j].CodePoint) = PascalTypeUnicode.cccBelow) then
begin
var MCMStart := j;
while (j < AStop) and (PascalTypeUnicode.CanonicalCombiningClass(AGlyphs[j+1].CodePoint) = PascalTypeUnicode.cccBelow) do
Inc(j);
if (PascalTypeUnicode.IsModifierCombiningMark(AGlyphs[MCMStart].CodePoint)) then
begin
// Move to start of S, maintaining order of already moved 220_MCMs
for var k := 0 to j - MCMStart do
AGlyphs.Move(MCMStart + k, AStart + MCM220_Count + k);
MCM220_Count := MCM220_Count + (j - MCMStart + 1);
end;
end;
Inc(j);
end;
end;
begin
var i := 0;
while (i < AGlyphs.Count) do
begin
// Find a sequence of non-starter characters
// CGJ (U+034F) blocks reordering per TR#53.
if (PascalTypeUnicode.CanonicalCombiningClass(AGlyphs[i].CodePoint) <> 0) and (AGlyphs[i].CodePoint <> PascalTypeUnicode.cpCGJ) then
begin
var RangeStart := i;
while (i + 1 < AGlyphs.Count) and (PascalTypeUnicode.CanonicalCombiningClass(AGlyphs[i+1].CodePoint) <> 0) and (AGlyphs[i+1].CodePoint <> PascalTypeUnicode.cpCGJ) do
Inc(i);
if (i > RangeStart) then
ReorderRange(RangeStart, i);
end;
Inc(i);
end;
Result := nil;
end;
function TPascalTypeArabicShaper.GetShapingClass(ACodePoint: TPascalTypeCodePoint): ArabicShapingClasses.TShapingClass;
begin
if (not ArabicShapingClasses.Trie.Loaded) then
ArabicShapingClasses.Load;
if (ArabicShapingClasses.Trie.TryGetValue(ACodePoint, Result)) and (Result <> scUnassigned) then
Exit(Result);
case ACodePoint of
// Fallback and Join Controls (ZWJ/ZWNJ)
// According to the HarfBuzz and OpenType shaping models:
// - ZWJ ($200D) must be treated as Dual_Joining (forcing joins)
// - ZWNJ ($200C) must be treated as Non_Joining (breaking joins)
// https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-arabic.md#special-function-codepoints
// https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-ot-shaper-arabic.cc
PascalTypeUnicode.cpZWJ:
Exit(scDual_Joining);
PascalTypeUnicode.cpZWNJ:
Exit(scNon_Joining);
// Mongolian Variation Selectors (FVS1..FVS3 and MVS)
// These should be treated as scTransparent to allow joining across them.
// https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-mongolian.md#terminology
// https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-ot-shaper-arabic.cc
$180B..$180E:
Exit(scTransparent);
end;
var Category := PascalTypeUnicode.GetCategory(ACodePoint);
if (ccVariationSelector in Category) then
Result := scTransparent
else
if (Category * [ccMarkNonSpacing, ccMarkEnclosing, ccOtherFormat] <> []) then
Result := scTransparent
else
Result := scNon_Joining;
end;
class function TPascalTypeArabicShaper.IsWord(ACodePoint: TPascalTypeCodePoint): boolean;
begin
if (PascalTypeUnicode.IsDefaultIgnorableEx(ACodePoint)) then
Exit(True);
var Category := PascalTypeUnicode.GetCategory(ACodePoint);
// Use ccAssigned to determine if unassigned
if (not (ccAssigned in Category)) then
Exit(True);
Result := (Category * [
ccOtherPrivate,
ccLetterModifier,
ccLetterOther,
ccMarkSpacingCombining,
ccMarkEnclosing,
ccMarkNonSpacing,
ccNumberDecimalDigit,
ccNumberLetter,
ccNumberOther,
ccSymbolCurrency,
ccSymbolModifier,
ccSymbolMath,
ccSymbolOther
] <> []);
end;
function TPascalTypeArabicShaper.NeedUnicodeComposition: boolean;
begin
Result := True;
end;
initialization
(*
** Scripts registered to the Arabic shaper according to the OpenType specification.
** https://learn.microsoft.com/en-us/typography/opentype/spec/scripttags
*)
TPascalTypeShaper.RegisterShaperForScript('arab', TPascalTypeArabicShaper); // Arabic
TPascalTypeShaper.RegisterShaperForScript('mong', TPascalTypeArabicShaper); // Mongolian
TPascalTypeShaper.RegisterShaperForScript('syrc', TPascalTypeArabicShaper); // Syriac
TPascalTypeShaper.RegisterShaperForScript('nko ', TPascalTypeArabicShaper); // N'Ko
TPascalTypeShaper.RegisterShaperForScript('phag', TPascalTypeArabicShaper); // Phags Pa
TPascalTypeShaper.RegisterShaperForScript('mand', TPascalTypeArabicShaper); // Mandaic