Changeset 418 for trunk/Components/RichTextDocumentUnit.pas
- Timestamp:
- Feb 25, 2019, 8:34:42 PM (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Components/RichTextDocumentUnit.pas
r405 r418 36 36 end; 37 37 38 TTextElementType = ( teText, // a character38 TTextElementType = ( teText, // a character 39 39 teWordBreak, 40 teLineBreak, // end of para40 teLineBreak, // end of para 41 41 teTextEnd, 42 42 teImage, 43 teStyle ); 43 teStyle, 44 teWrapChar, // A non-whitespace wrappable character (ALT) 45 teLeadByte, // DBCS lead byte (ALT) 46 teSecondByte ); // DBCS secondary byte (ALT) 44 47 45 48 TTextElement = record … … 113 116 Buffer: PChar; 114 117 BufferLength: longint ): longint; 118 119 120 // ALT begins 121 // 122 123 // Check for special text element types and adjust value accordingly. 124 procedure CheckSpecialElementType( const Character: Char; 125 var ElementType: TTextElementType; 126 var InsideDBC: Boolean; 127 const Codepage: LongInt ); 128 129 // Returns true if the given byte value is a legally-wrappable single-byte 130 // character under the given Asian codepage. 131 function IsAsianWrapChar( const CharByte: Byte; 132 const Codepage: LongInt ): boolean; 133 134 // Returns true if the given byte value is the leading byte of a multi-byte 135 // character under the given Asian codepage. 136 function IsDBCSLeadByte( const CharByte: Byte; 137 const Codepage: LongInt ): boolean; 138 139 // Returns true if the given byte value is valid as a possible second byte of 140 // a multi-byte character (this does not guarantee that it IS one, just that 141 // it COULD be). 142 function IsDBCSSecondByte( const CharByte: Byte; 143 const Codepage: LongInt ): boolean; 144 145 // Adjusts the character position to the beginning of any multi-byte character. 146 procedure MoveToCharacterBoundary( TextPointer: PChar; 147 var Index: LongInt; 148 var Offset: LongInt; 149 RowStart: LongInt; 150 Codepage: LongInt ); 151 // 152 // ALT ends 153 115 154 116 155 Implementation … … 349 388 var 350 389 TheChar: Char; 351 NextChar: char;390 NextChar: Char; 352 391 begin 353 392 with Result do … … 356 395 Character := TheChar; 357 396 inc( TextPointer ); 397 358 398 359 399 case TheChar of … … 404 444 end; 405 445 446 // '-': // ---- Hyphen (ALT) 447 // ElementType := teWrapChar; 448 406 449 else 407 450 ElementType := teText; 408 451 end; 452 409 453 end; // with 454 410 455 NextElement := TextPointer; 411 456 end; … … 485 530 end; 486 531 end 532 533 // '-': // ---- Hyphen (ALT) 534 // ElementType := teWrapChar; 535 487 536 else 488 537 ElementType := teText; … … 802 851 end; 803 852 853 // ALT begins 854 // 855 // Check for special text element types that depend on context. 856 // 857 procedure CheckSpecialElementType( const Character: Char; 858 var ElementType: TTextElementType; 859 var InsideDBC: Boolean; 860 const Codepage: LongInt ); 861 var 862 CharByte: Byte; 863 begin 864 if Codepage in [ 874, 932, 936, 942, 943, 949, 950, 1381, 1386 ] then 865 begin 866 CharByte := ord( Character ); 867 if InsideDBC then 868 begin 869 InsideDBC := false; 870 // sanity check for corrupt text sequence (definitely not foolproof) 871 if IsDBCSSecondByte( CharByte, Codepage ) then 872 ElementType := teSecondByte 873 else 874 ElementType := teText; 875 end 876 else 877 begin 878 if IsAsianWrapChar( CharByte, Codepage ) then 879 begin 880 ElementType := teWrapChar; 881 InsideDBC := false; 882 end 883 else if IsDBCSLeadByte( CharByte, Codepage ) then 884 begin 885 ElementType := teLeadByte; 886 InsideDBC := true; 887 end; 888 end; 889 end; 890 end; 891 892 // Check if this (single-byte) character is a legal wrap point under certain 893 // Asian codepages. This is really only used for Thai and for Japanese 894 // half-width katakana; other DBCS languages use double-byte characters for all 895 // their native glyphs. 896 // 897 function IsAsianWrapChar( const CharByte: Byte; 898 const Codepage: LongInt ): boolean; 899 begin 900 Result := false; 901 902 if ( CharByte < $80) then 903 exit; 904 905 case Codepage of 906 932, 942, 943: // Japanese 907 if CharByte in [ $A2, $A6, $B1..$DD ] then 908 Result := true; 909 874: // Thai 910 Result := true; 911 end; 912 end; 913 914 // Check if this is the lead byte of a double-byte character. This is essential 915 // to know in certain cases: 916 // - Nothing must ever be inserted between such a byte and the next byte 917 // (e.g. line break, tag, etc). 918 // - Cursor position must never be set between such a byte and the next byte. 919 // - Selection state must never change between such a byte and the next byte. 920 // 921 function IsDBCSLeadByte( const CharByte: Byte; 922 const Codepage: LongInt ): boolean; 923 begin 924 Result := false; 925 926 case Codepage of 927 932, 942, 943: // Japanese 928 if CharByte in [ $81..$9F, $E0..$FC ] then 929 Result := true; 930 949: // Korean KSC 931 if CharByte in [ $85..$FE ] then 932 Result := true; 933 1381: // Chinese GB2312 934 if CharByte in [ $8C..$FE ] then 935 Result := true; 936 936, 950, 1386: // Chinese BIG-5 or GBK 937 if CharByte in [ $81..$FE ] then 938 Result := true; 939 end; 940 end; 941 942 // Check to see if this byte is a valid second byte in a double-byte character. 943 // (This doesn't guarantee that it IS such a byte, only that it COULD be. The 944 // caller is assumed to know whether we're in a double byte character or not.) 945 // 946 function IsDBCSSecondByte( const CharByte: Byte; 947 const Codepage: LongInt ): boolean; 948 begin 949 Result := false; 950 951 case Codepage of 952 932, 936, 942, 943, 949, 950, 1386: 953 if CharByte >= $40 then 954 Result := true; 955 1381: 956 if CharByte >= $A1 then 957 Result := true; 958 end; 959 end; 960 961 // Given a string position, check to see if it's in the middle of a double-byte 962 // character; if so, move back by one position so that we're sitting immediately 963 // in front of the double-byte character instead. 964 // 965 procedure MoveToCharacterBoundary( TextPointer: PChar; 966 var Index: LongInt; 967 var Offset: LongInt; 968 RowStart: LongInt; 969 Codepage: LongInt ); 970 var 971 P: PChar; 972 NextP: PChar; 973 Element: TTextElement; 974 InsideDBC: boolean; 975 begin 976 if ( Offset > 0 ) and 977 ( Codepage in [ 932, 936, 942, 943, 949, 950, 1381, 1386 ]) then 978 begin 979 P := TextPointer + RowStart; 980 InsideDBC := false; 981 982 // Because parsing of byte types is state based, we must verify every 983 // byte's type from the beginning of the line until we reach the target. 984 while RowStart < Index do 985 begin 986 Element := ExtractNextTextElement( P, NextP ); 987 CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage ); 988 P := NextP; 989 inc( RowStart ); 990 end; 991 { 992 // We've reached the target position, and the current parsing state should 993 // be correctly set. So now we can safely determine the target byte's type. 994 Element := ExtractNextTextElement( P, NextP ); 995 CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage ); 996 } 997 if InsideDBC then 998 begin 999 // If this the first byte of a double byte character, move back by one. 1000 dec( Index ); 1001 dec( Offset ); 1002 end; 1003 end; 1004 1005 end; 1006 // 1007 // ALT ends 1008 804 1009 Initialization 805 1010 End.
Note:
See TracChangeset
for help on using the changeset viewer.