Changeset 418 for trunk/Components


Ignore:
Timestamp:
Feb 25, 2019, 8:34:42 PM (6 years ago)
Author:
ataylor
Message:

Experimental new logic to try and fix DBCS text wrapping.

Location:
trunk/Components
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/Components/CanvasFontManager.pas

    r403 r418  
    5050    lMaxCharInc: LONG;
    5151    lMaxDescender: LONG;
     52    lEmInc: LONG;
    5253
    5354    destructor Destroy; override;
     
    103104    function AverageCharWidth: longint;
    104105    function MaximumCharWidth: longint;
    105 
     106    function CJKCharWidth: longint;         // ALT
     107    function CJKTextWidth(  const Length: longint; const S: PChar ): longint;
    106108    function IsFixed: boolean;
    107109
     
    391393      Font.lAveCharWidth := pfm^[ T ].lAveCharWidth;
    392394      Font.lMaxCharInc := pfm^[ T ].lMaxCharInc;
     395      Font.lEmInc := pfm^[ T ].lEmInc;
    393396
    394397      Font.ID := -1; // and always shall be so...
     
    825828  Result.lAveCharWidth := FontInfo.lAveCharWidth;
    826829  Result.lMaxCharInc := FontInfo.lMaxCharInc;
     830  Result.lEmInc := FontInfo.lEmInc;
    827831
    828832  // Set style flags
     
    10601064  FCurrentFont.lMaxCharInc := fm.lMaxCharInc;
    10611065  FCurrentFont.lMaxDescender := fm.lMaxDescender;
     1066  FCurrentFont.lEmInc := fm.lEmInc;
    10621067end;
    10631068
     
    11051110  Result := FCurrentFont.FixedWidth;
    11061111end;
     1112
     1113// ALT begins
     1114//
     1115// A 'default' average width for DBCS characters; probably not very accurate.
     1116// Should only be used as a fallback in case querying the actual string width
     1117// is impossible/fails.
     1118//
     1119function TCanvasFontManager.CJKCharWidth: longint;
     1120begin
     1121  EnsureMetricsLoaded;
     1122  if FCurrentFont.lMaxCharInc < FCurrentFont.lEmInc then
     1123    Result := FCurrentFont.lMaxCharInc * FontWidthPrecisionFactor
     1124  else
     1125    Result := FCurrentFont.lEmInc * FontWidthPrecisionFactor;
     1126end;
     1127
     1128// Get the render width of a CJK (Chinese/Japanese/Korean) character string.
     1129//
     1130function TCanvasFontManager.CJKTextWidth(  const Length: longint; const S: PChar ): longint;
     1131var
     1132  aptl: Array[ 0..TXTBOX_COUNT-1 ] Of PointL;
     1133begin
     1134  EnsureMetricsLoaded;
     1135  GpiQueryTextBox( FCanvas.Handle, Length, S^, TXTBOX_COUNT, aptl[0] );
     1136  // The pad of 2.1 per character was chosen through experimentation, since the
     1137  // actual returned value always seems too narrow. I have no idea why. (ALT)
     1138  Result := ( 2.1 + aptl[ TXTBOX_BOTTOMRIGHT ].x) * FontWidthPrecisionFactor;
     1139end;
     1140//
     1141// ALT ends
    11071142
    11081143procedure TCanvasFontManager.DrawString( Var Point: TPoint;
  • trunk/Components/RichTextDocumentUnit.pas

    r405 r418  
    3636  end;
    3737
    38   TTextElementType = ( teText, // a character
     38  TTextElementType = ( teText,              // a character
    3939                       teWordBreak,
    40                        teLineBreak, // end of para
     40                       teLineBreak,         // end of para
    4141                       teTextEnd,
    4242                       teImage,
    43                        teStyle );
     43                       teStyle,
     44                       teWrapChar,          // A non-whitespace wrappable character (ALT)
     45                       teLeadByte,          // DBCS lead byte (ALT)
     46                       teSecondByte );      // DBCS secondary byte (ALT)
    4447
    4548  TTextElement = record
     
    113116                                Buffer: PChar;
    114117                                BufferLength: longint ): longint;
     118
     119
     120// ALT begins
     121//
     122
     123// Check for special text element types and adjust value accordingly.
     124procedure CheckSpecialElementType( const Character:   Char;
     125                                   var   ElementType: TTextElementType;
     126                                   var   InsideDBC:   Boolean;
     127                                   const Codepage:    LongInt );
     128
     129// Returns true if the given byte value is a legally-wrappable single-byte
     130// character under the given Asian codepage.
     131function IsAsianWrapChar( const CharByte: Byte;
     132                          const Codepage: LongInt ): boolean;
     133
     134// Returns true if the given byte value is the leading byte of a multi-byte
     135// character under the given Asian codepage.
     136function IsDBCSLeadByte( const CharByte: Byte;
     137                         const Codepage: LongInt ): boolean;
     138
     139// Returns true if the given byte value is valid as a possible second byte of
     140// a multi-byte character (this does not guarantee that it IS one, just that
     141// it COULD be).
     142function IsDBCSSecondByte( const CharByte: Byte;
     143                           const Codepage: LongInt ): boolean;
     144
     145// Adjusts the character position to the beginning of any multi-byte character.
     146procedure MoveToCharacterBoundary(     TextPointer: PChar;
     147                                   var Index:       LongInt;
     148                                   var Offset:      LongInt;
     149                                       RowStart:    LongInt;
     150                                       Codepage:    LongInt );
     151//
     152// ALT ends
     153
    115154
    116155Implementation
     
    349388var
    350389  TheChar: Char;
    351   NextChar: char;
     390  NextChar: Char;
    352391begin
    353392  with Result do
     
    356395    Character := TheChar;
    357396    inc( TextPointer );
     397
    358398
    359399    case TheChar of
     
    404444      end;
    405445
     446//    '-': // ---- Hyphen (ALT)
     447//      ElementType := teWrapChar;
     448
    406449      else
    407450        ElementType := teText;
    408451    end;
     452
    409453  end; // with
     454
    410455  NextElement := TextPointer;
    411456end;
     
    485530        end;
    486531      end
     532
     533//    '-': // ---- Hyphen (ALT)
     534//      ElementType := teWrapChar;
     535
    487536      else
    488537        ElementType := teText;
     
    802851end;
    803852
     853// ALT begins
     854//
     855// Check for special text element types that depend on context.
     856//
     857procedure CheckSpecialElementType( const Character:   Char;
     858                                   var   ElementType: TTextElementType;
     859                                   var   InsideDBC:   Boolean;
     860                                   const Codepage:    LongInt );
     861var
     862  CharByte: Byte;
     863begin
     864  if Codepage in [ 874, 932, 936, 942, 943, 949, 950, 1381, 1386 ] then
     865  begin
     866    CharByte := ord( Character );
     867    if InsideDBC then
     868    begin
     869        InsideDBC := false;
     870        // sanity check for corrupt text sequence (definitely not foolproof)
     871        if IsDBCSSecondByte( CharByte, Codepage ) then
     872          ElementType := teSecondByte
     873        else
     874          ElementType := teText;
     875    end
     876    else
     877    begin
     878      if IsAsianWrapChar( CharByte, Codepage ) then
     879      begin
     880        ElementType := teWrapChar;
     881        InsideDBC := false;
     882      end
     883      else if IsDBCSLeadByte( CharByte, Codepage ) then
     884      begin
     885        ElementType := teLeadByte;
     886        InsideDBC := true;
     887      end;
     888    end;
     889  end;
     890end;
     891
     892// Check if this (single-byte) character is a legal wrap point under certain
     893// Asian codepages. This is really only used for Thai and for Japanese
     894// half-width katakana; other DBCS languages use double-byte characters for all
     895// their native glyphs.
     896//
     897function IsAsianWrapChar( const CharByte: Byte;
     898                          const Codepage: LongInt ): boolean;
     899begin
     900    Result := false;
     901
     902    if ( CharByte < $80) then
     903      exit;
     904
     905    case Codepage of
     906      932, 942, 943:        // Japanese
     907        if CharByte in [ $A2, $A6, $B1..$DD ] then
     908          Result := true;
     909      874:                  // Thai
     910        Result := true;
     911    end;
     912end;
     913
     914// Check if this is the lead byte of a double-byte character. This is essential
     915// to know in certain cases:
     916//  - Nothing must ever be inserted between such a byte and the next byte
     917//    (e.g. line break, tag, etc).
     918//  - Cursor position must never be set between such a byte and the next byte.
     919//  - Selection state must never change between such a byte and the next byte.
     920//
     921function IsDBCSLeadByte( const CharByte: Byte;
     922                         const Codepage: LongInt ): boolean;
     923begin
     924    Result := false;
     925
     926    case Codepage of
     927      932, 942, 943:        // Japanese
     928        if CharByte in [ $81..$9F, $E0..$FC ] then
     929          Result := true;
     930      949:                  // Korean KSC
     931        if CharByte in [ $85..$FE ] then
     932          Result := true;
     933      1381:                 // Chinese GB2312
     934        if CharByte in [ $8C..$FE ] then
     935          Result := true;
     936      936, 950, 1386:       // Chinese BIG-5 or GBK
     937        if CharByte in [ $81..$FE ] then
     938          Result := true;
     939    end;
     940end;
     941
     942// Check to see if this byte is a valid second byte in a double-byte character.
     943// (This doesn't guarantee that it IS such a byte, only that it COULD be. The
     944// caller is assumed to know whether we're in a double byte character or not.)
     945//
     946function IsDBCSSecondByte( const CharByte: Byte;
     947                           const Codepage: LongInt ): boolean;
     948begin
     949    Result := false;
     950
     951    case Codepage of
     952      932, 936, 942, 943, 949, 950, 1386:
     953        if CharByte >= $40 then
     954          Result := true;
     955      1381:
     956        if CharByte >= $A1 then
     957          Result := true;
     958    end;
     959end;
     960
     961// Given a string position, check to see if it's in the middle of a double-byte
     962// character; if so, move back by one position so that we're sitting immediately
     963// in front of the double-byte character instead.
     964//
     965procedure MoveToCharacterBoundary(     TextPointer: PChar;
     966                                   var Index:       LongInt;
     967                                   var Offset:      LongInt;
     968                                       RowStart:    LongInt;
     969                                       Codepage:    LongInt  );
     970var
     971  P:         PChar;
     972  NextP:     PChar;
     973  Element:   TTextElement;
     974  InsideDBC: boolean;
     975begin
     976  if ( Offset > 0 ) and
     977     ( Codepage in [ 932, 936, 942, 943, 949, 950, 1381, 1386 ]) then
     978  begin
     979    P := TextPointer + RowStart;
     980    InsideDBC := false;
     981
     982    // Because parsing of byte types is state based, we must verify every
     983    // byte's type from the beginning of the line until we reach the target.
     984    while RowStart < Index do
     985    begin
     986      Element := ExtractNextTextElement( P, NextP );
     987      CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
     988      P := NextP;
     989      inc( RowStart );
     990    end;
     991{
     992    // We've reached the target position, and the current parsing state should
     993    // be correctly set. So now we can safely determine the target byte's type.
     994    Element := ExtractNextTextElement( P, NextP );
     995    CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
     996}
     997    if InsideDBC then
     998    begin
     999      // If this the first byte of a double byte character, move back by one.
     1000      dec( Index );
     1001      dec( Offset );
     1002    end;
     1003  end;
     1004
     1005end;
     1006//
     1007// ALT ends
     1008
    8041009Initialization
    8051010End.
  • trunk/Components/RichTextLayoutUnit.pas

    r395 r418  
    7373
    7474    FRichTextSettings: TRichTextSettings;
     75
     76    Codepage:    ULong;       // ALT
    7577
    7678    // Drawing functions
     
    169171var
    170172  DefaultFontSpec: TFontSpec;
     173  CpSize:          ULong;       // ALT
    171174Begin
    172175  Inherited Create;
     
    199202                       DefaultFontSpec );
    200203  FFontManager.DefaultFontSpec := DefaultFontSpec;
     204
     205  DosQueryCp( sizeof( Codepage ), Codepage, CpSize );       // ALT
    201206
    202207  Layout;
     
    284289  NextP: PChar;
    285290  NextP2: PChar;
     291  NextP3: PChar;
    286292
    287293  WordStart: PChar;
     
    307313
    308314  DoWrap: boolean;
     315
     316  InsideDBC:   Boolean;     // ALT
    309317
    310318  // Nested procedure
     
    383391  WordStarted := false;
    384392  DisplayedCharsSinceFontChange := false;
     393  InsideDBC := false;
    385394
    386395  repeat
    387396    CurrentElement := ExtractNextTextElement( P, NextP );
    388397    assert( NextP > P );
     398    CheckSpecialElementType( CurrentElement.Character, CurrentElement.ElementType, InsideDBC, Codepage );   // ALT
    389399
    390400    OnBreak := false;
     
    395405        CurrentCharWidth := FFontManager.CharWidth( ' ' );
    396406        OnBreak := true;
     407        InsideDBC := false;
    397408      end;
    398409
     
    404415        WordStart := NextP;
    405416        WordX := 0;
     417        InsideDBC := false;
    406418
    407419        P := NextP;
     
    413425      begin
    414426        DoLine( P, NextP, WordStartX + WordX );
     427        InsideDBC := false;
    415428
    416429        // end of text, done
     
    446459        CurrentCharWidth := FFontManager.CharWidth( CurrentElement.Character );
    447460        WordStarted := true;
    448       end;
     461        InsideDBC := false;
     462      end;
     463
     464      // ALT begins
     465      //
     466      teWrapChar:
     467      begin
     468        // This is a legal break character, but not a space (so we still display it).
     469        CurrentCharWidth := FFontManager.CharWidth( CurrentElement.Character );
     470
     471        // Treat as the start of a new word (for the sake of wrapping).
     472        WordStarted := true;
     473        inc( WordStartX, WordX + CurrentCharWidth );
     474        WordX := 0;
     475        WordStart := NextP;
     476      end;
     477
     478      teLeadByte:
     479      begin
     480        // Leading byte of a double-byte character.
     481        // Get the complete character width for our wrapping calculations.
     482        if ( NextP > P ) then
     483          CurrentCharWidth := FFontManager.CJKTextWidth( 2, P )
     484        else
     485          CurrentCharWidth := FFontManager.CJKCharWidth;
     486        WordStarted := true;
     487      end;
     488
     489      teSecondByte:
     490      begin
     491        // Secondary byte of a double-byte character.
     492        // The full character width was already assigned to the leading byte.
     493        CurrentCharWidth := 0;
     494
     495        // We treat each double-byte character as a complete word for the sake
     496        // of the wrapping algorithm.
     497        inc( LineWordsCompleted );
     498        WordStarted := true;
     499        inc( WordStartX, WordX + CurrentCharWidth );
     500        WordX := 0;
     501        WordStart := NextP;
     502      end;
     503      //
     504      // ALT ends
    449505
    450506      teStyle:
    451507      begin
     508        InsideDBC := false;
    452509        case CurrentElement.Tag.TagType of
    453510          ttBeginLink:
     
    582639       + WordX
    583640       + CurrentCharWidth
    584        >= WrapX then
     641       >= WrapX  then
    585642    begin
    586643      // reached right hand side before finding end of word
     
    588645        // always wrap after at least one word displayed
    589646        DoWrap := true
    590       else if not FRichTextSettings.AtLeastOneWordBeforeWrap then
     647
     648      else if ( CurrentElement.ElementType = teWrapChar ) or
     649              ( CurrentElement.ElementType = teLeadByte ) then
     650        DoWrap := true                                                  // ALT
     651
     652      else if ( not FRichTextSettings.AtLeastOneWordBeforeWrap ) then
    591653        // only wrap during the first word, if the "at least 1 word" flag is not set.
    592654        DoWrap := true;
     
    606668
    607669          NextElement := ExtractNextTextElement( NextP, NextP2 );
     670
     671          // ALT
     672          if InsideDBC then
     673          begin
     674            // we're in the middle of a double-byte character, so keep the next byte too
     675            InsideDBC := false;
     676            NextP := NextP2;
     677            NextElement := ExtractNextTextElement( NextP2, NextP3 );
     678            NextP2 := NextP3;
     679          end;
     680          // /ALT
     681
    608682          if NextElement.ElementType <> teLineBreak then
    609683            // there is still more on the line...
     
    645719        CurrentLine.Wrapped := true;
    646720
    647         // take the width of the last space of the
    648         // previous word off the line width
    649         DoLine( WordStart, // current line ends at start of this word
    650                 WordStart, // next line starts at start of this word
    651                 WordStartX - FFontManager.CharWidth( ' ' ) );
    652         if CurrentElement.ElementType = teImage then
    653           if Bitmap <> nil then
    654             if BitmapHeight > CurrentLine.Height then
    655               CurrentLine.Height := BitmapHeight;
     721
     722        if ( CurrentElement.ElementType = teLeadByte ) or
     723           ( CurrentElement.ElementType = teWrapChar ) then     // ALT
     724        begin
     725          // draw up to but not including this 'word' (ALT)
     726          DoLine( WordStart,
     727                  WordStart,
     728                  WordStartX );
     729        end
     730        else
     731        begin                                                   // ALT
     732          // take the width of the last space of the
     733          // previous word off the line width
     734          DoLine( WordStart, // current line ends at start of this word
     735                  WordStart, // next line starts at start of this word
     736                  WordStartX - FFontManager.CharWidth( ' ' ) );
     737          if CurrentElement.ElementType = teImage then
     738            if Bitmap <> nil then
     739              if BitmapHeight > CurrentLine.Height then
     740                CurrentLine.Height := BitmapHeight;
     741        end;                                                    // ALT
    656742
    657743        // do NOT reset WordX to zero; as we are continuing
     
    727813  NewMarginX: longint;
    728814  StartedDrawing: boolean;
     815  InsideDBC: boolean;       // ALT
    729816begin
    730817  Line := FLines[ LineIndex ];
     
    736823
    737824  StartedDrawing := false;
     825  InsideDBC := false;       // ALT
    738826
    739827  Link := '';
     
    746834  begin
    747835    Element := ExtractNextTextElement( P, NextP );
     836    CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );   // ALT
    748837
    749838    case Element.ElementType of
    750839      teWordBreak,
    751840      teText,
     841      teLeadByte,           // ALT
     842      teWrapChar,           // ALT
    752843      teImage:
    753844      begin
     
    772863
    773864        // Now find out how wide the thing is
    774         inc( X, GetElementWidth( Element ) );
     865        if (( Element.ElementType = teLeadByte ) And ( EndP > P )) then    // ALT
     866          inc( X, FFontManager.CJKTextWidth( 2, P ))
     867        else
     868          inc( X, GetElementWidth( Element ) );
    775869
    776870        if X div FontWidthPrecisionFactor
     
    10231117    end;
    10241118
    1025     teText, teWordBreak:
     1119    teText, teWordBreak, teWrapChar:        // ALT
    10261120      Result := FFontManager.CharWidth( Element.Character );
     1121
     1122    teLeadByte:                             // ALT - should not be reached
     1123      Result := FFontManager.CJKCharWidth;
     1124
     1125    teSecondByte:                           // ALT
     1126      Result := 0;
    10271127
    10281128    else
  • trunk/Components/RichTextView.PAS

    r395 r418  
    459459
    460460Procedure TRichTextView.SetSelectionStartInternal( SelectionStart: longint );
     461var
     462  Offset: longint;     // ALT
    461463begin
    462464  if SelectionStart = FSelectionStart then
     
    468470      ClearSelection;
    469471
     472  // ALT
     473  Offset := FCursorOffset;
     474  MovetoCharacterBoundary( FText, SelectionStart, FCursorOffset, Offset, FLayout.Codepage );
     475
    470476  FSelectionStart := SelectionStart;
    471477  if FSelectionEnd = -1 then
     
    479485  StartRedrawLine: longint;
    480486  EndRedrawLine: longint;
     487  Offset: longint;         // ALT
    481488  OldClip: TRect;
    482489begin
    483490  if SelectionEnd = FSelectionEnd then
    484491    exit;
     492
     493  // ALT
     494  Offset := FCursorOffset;
     495  MovetoCharacterBoundary( FText, SelectionEnd, FCursorOffset, Offset, FLayout.Codepage );
    485496
    486497  if FSelectionStart = -1 then
     
    797808    // we haven't yet done a layout
    798809    Layout;
     810
    799811
    800812end;
     
    20182030                                           PreserveSelection: boolean );
    20192031var
     2032//  P: PChar;                 // ALT
     2033//  NextP: PChar;             // ALT
     2034//  Element: TTextElement;    // ALT
     2035//  InsideDBC: boolean;       // ALT
     2036  RowStart: longint;        // ALT
    20202037  Index: longint;
    20212038begin
    20222039  RemoveCursor;
     2040
     2041  Index := FLayout.GetCharIndex( FLayout.FLines[ Row ].Text ) + Offset;
     2042
     2043// ALT
     2044{
     2045  if ( Offset > 0 ) and
     2046     ( FLayout.Codepage in [ 932, 936, 942, 943, 949, 950, 1381, 1386 ]) then
     2047  begin
     2048    RowStart := FLayout.GetCharIndex( FLayout.FLines[ Row ].Text );
     2049    P := FText + RowStart;
     2050    InsideDBC := false;
     2051    while RowStart < Index do
     2052    begin
     2053      Element := ExtractNextTextElement( P, NextP );
     2054      CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, FLayout.Codepage );
     2055      P := NextP;
     2056      inc( RowStart );
     2057    end;
     2058    Element := ExtractNextTextElement( P, NextP );
     2059    CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, FLayout.Codepage );
     2060    if InsideDBC then
     2061    begin
     2062      dec( Index );
     2063      dec( Offset );
     2064    end;
     2065  end;
     2066}
     2067
     2068  RowStart := FLayout.GetCharIndex( FLayout.FLines[ Row ].Text );
     2069  MoveToCharacterBoundary( FText, Index, Offset, RowStart, FLayout.Codepage );  // ALT
     2070
    20232071  FCursorOffset := Offset;
    20242072  FCursorRow := Row;
    2025   Index := FLayout.GetCharIndex( FLayout.FLines[ Row ].Text ) + Offset;
     2073
    20262074  if PreserveSelection then
    20272075  begin
     
    20472095
    20482096  Element := ExtractNextTextElement( P, NextP );
     2097
    20492098  P := NextP;
    20502099  while Element.ElementType = teStyle do
Note: See TracChangeset for help on using the changeset viewer.