source: trunk/Components/RichTextDocumentUnit.pas@ 420

Last change on this file since 420 was 420, checked in by ataylor, 6 years ago

Improve DBCS string width calculations, other small tweaks.

  • Property svn:eol-style set to native
File size: 25.6 KB
Line 
1Unit RichTextDocumentUnit;
2
3// Declarations of tags, and parsing functions
4
5Interface
6
7uses
8 Classes;
9
10type
11 TTagType = ( ttInvalid,
12 ttBold, ttBoldOff,
13 ttItalic, ttItalicOff,
14 ttUnderline, ttUnderlineOff,
15 ttFixedWidthOn, ttFixedWidthOff,
16 ttHeading1, ttHeading2, ttHeading3, ttHeadingOff,
17 ttColor, ttColorOff,
18 ttBackgroundColor, ttBackgroundColorOff,
19 ttRed, ttBlue, ttGreen, ttBlack,
20 ttWrap,
21 ttAlign,
22 ttBeginLink, ttEndLink,
23 ttSetLeftMargin, ttSetRightMargin,
24 ttImage,
25 ttFont, ttFontOff,
26 ttEnd );
27
28 TStandardColor = record
29 Name: string[ 32 ];
30 Color: TColor;
31 end;
32
33 TTag = record
34 TagType: TTagType;
35 Arguments: string;
36 end;
37
38 TTextElementType = ( teText, // a character
39 teWordBreak,
40 teLineBreak, // end of para
41 teTextEnd,
42 teImage,
43 teStyle,
44 teWrapChar, // A non-whitespace wrappable character (ALT)
45 teLeadByte, // DBCS lead byte (ALT)
46 teSecondByte ); // DBCS secondary byte (ALT)
47
48 TTextElement = record
49 ElementType: TTextElementType;
50 Character: Char;
51 Tag: TTag;
52 end;
53
54 TTextAlignment = ( taLeft,
55 taRight,
56 taCenter );
57
58// Returns tag pointed to by TextPointer and
59// moves TextPointer to the first char after the tag.
60Function ExtractTag( Var TextPointer: PChar ): TTag;
61
62// Returns tag ending at TextPointer
63// (Expects textpointer is currently pointing at the >)
64// and moves TextPointer to the first char of the tag
65Function ExtractPreviousTag( const TextStart: PChar;
66 Var TextPointer: PChar ): TTag;
67
68function ExtractNextTextElement( TextPointer: PChar;
69 Var NextElement: PChar ): TTextElement;
70
71function ExtractPreviousTextElement( const TextStart: PChar;
72 TextPointer: PChar;
73 Var NextElement: PChar ): TTextElement;
74
75// Parse a color name or value (#hexval). Returns true if valid
76function GetTagColor( const ColorParam: string;
77 var Color: TColor ): boolean;
78
79function GetTagTextAlignment( const AlignParam: string;
80 const Default: TTextAlignment ): TTextAlignment;
81
82function GetTagTextWrap( const WrapParam: string ): boolean;
83
84// Search within a rich text document for the given text
85// if found, returns true, pMatch is set to the first match,
86// and MatchLength returns the length of the match
87// (which may be greater than the length of Text due to
88// to skipping tags)
89// if not found, returns false, pMatch is set to nil
90function RichTextFindString( pRichText: PChar;
91 const Text: string;
92 var pMatch: PChar;
93 var MatchLength: longint ): boolean;
94
95// Returns the start of the previous word,
96// or the current word if pStart is in the middle of the word
97function RichTextWordLeft( pRichText: PChar;
98 pStart: PChar ): PChar;
99
100// Returns the start of the next word.
101function RichTextWordRight( pStart: PChar ): PChar;
102
103// If pStart is in the middle of a word, then
104// returns true and sets the start and length of the word
105function RichTextWordAt( pRichText: PChar;
106 pStart: PChar;
107 Var pWordStart: PChar;
108 Var WordLength: longint ): boolean;
109
110// Copies plaintext of richtext starting at StartP
111// to the given buffer. Returns number of characters copied.
112// Buffer may be nil
113// If BufferLength is negative, it is effectively ignored
114function CopyPlainTextToBuffer( StartP: PChar;
115 EndP: PChar;
116 Buffer: PChar;
117 BufferLength: longint ): longint;
118
119
120// ALT begins
121//
122
123// Check for special text element types and adjust value accordingly.
124procedure CheckSpecialElementType( const Character: Char;
125 var ElementType: TTextElementType;
126 var InsideDBC: Boolean;
127 const Codepage: LongInt );
128
129// Returns true if the given byte value is a legally-wrappable single-byte
130// character under the given Asian codepage.
131function IsAsianWrapChar( const CharByte: Byte;
132 const Codepage: LongInt ): boolean;
133
134// Returns true if the given byte value is the leading byte of a multi-byte
135// character under the given Asian codepage.
136function IsDBCSLeadByte( const CharByte: Byte;
137 const Codepage: LongInt ): boolean;
138
139// Returns true if the given byte value is valid as a possible second byte of
140// a multi-byte character (this does not guarantee that it IS one, just that
141// it COULD be).
142function IsDBCSSecondByte( const CharByte: Byte;
143 const Codepage: LongInt ): boolean;
144
145// Adjusts the character position to the beginning or end of any multi-byte
146// character.
147procedure MoveToCharacterBoundary( TextPointer: PChar;
148 var Index: LongInt;
149 var Offset: LongInt;
150 RowStart: LongInt;
151 Codepage: LongInt;
152 Advance: Boolean; );
153//
154// ALT ends
155
156
157Implementation
158
159uses
160 BseDOS, // for NLS/case mapping
161 SysUtils,
162 ACLStringUtility;
163
164const
165 TagStr: array[ ttInvalid .. ttEnd ] of string =
166 (
167 '', //
168 'b',
169 '/b',
170 'i',
171 '/i',
172 'u',
173 '/u',
174 'tt',
175 '/tt',
176 'h1',
177 'h2',
178 'h3',
179 '/h',
180 'color',
181 '/color',
182 'backcolor',
183 '/backcolor',
184 'red',
185 'blue',
186 'green',
187 'black',
188 'wrap',
189 'align',
190 'link',
191 '/link',
192 'leftmargin',
193 'rightmargin',
194 'image',
195 'font',
196 '/font',
197 ''
198 );
199
200 StandardColors: array[ 0..7 ] of TStandardColor =
201 (
202 ( Name : 'white' ; Color: clWhite ),
203 ( Name : 'black' ; Color: clBlack ),
204 ( Name : 'red' ; Color: clRed ),
205 ( Name : 'blue' ; Color: clBlue ),
206 ( Name : 'green' ; Color: clLime ),
207 ( Name : 'purple'; Color: clFuchsia ),
208 ( Name : 'yellow'; Color: clYellow ),
209 ( Name : 'cyan' ; Color: clAqua )
210 );
211
212Procedure ParseTag( const Text: string;
213 Var Tag: TTag );
214var
215 TagType: TTagType;
216 TagTypeText: string;
217 SpacePos: longint;
218begin
219 SpacePos := Pos( ' ', Text );
220 if SpacePos <> 0 then
221 begin
222 Tag.Arguments := trim( Copy( Text, SpacePos + 1, 255 ) );
223 TagTypeText := LowerCase( Copy( Text, 1, SpacePos - 1 ) );
224 end
225 else
226 begin
227 Tag.Arguments := ''; // to save time copying when not needed
228 TagTypeText := LowerCase( Text );
229 end;
230
231 for TagType := ttBold to ttEnd do
232 begin
233 if TagStr[ TagType ] = TagTypeText then
234 begin
235 Tag.TagType := TagType;
236 exit;
237 end;
238 end;
239
240 // not found
241 Tag.TagType := ttInvalid;
242end;
243
244var
245 TagText: string;
246 TagArgText: string;
247
248Function ExtractTag( Var TextPointer: PChar ): TTag;
249var
250 CurrentChar: Char;
251 TagTooLong: boolean;
252 InQuote: boolean;
253begin
254// assert( TextPointer[ 0 ] = '<' );
255 TagText := '';
256 TagTooLong := false;
257 InQuote := false;
258
259 repeat
260 CurrentChar := TextPointer^;
261
262 if ( CurrentChar = '>' )
263 and ( not InQuote ) then
264 begin
265 // found tag end.
266 if TagTooLong then
267 Result.TagType := ttInvalid
268 else
269 ParseTag( TagText, Result );
270 inc( TextPointer );
271 exit;
272 end;
273
274 if CurrentChar = #0 then
275 begin
276 // if we reach here we have reached the end of text
277 // during a tag. invalid tag.
278 Result.TagType := ttInvalid;
279 exit;
280 end;
281
282 if CurrentChar = DoubleQuote then
283 begin
284 if not InQuote then
285 begin
286 InQuote := true
287 end
288 else
289 begin
290 // Could be escaped quote ""
291 if ( TextPointer + 1 ) ^ = DoubleQuote then
292 begin
293 // yes it is
294 inc( TextPointer ); // skip second one
295 end
296 else
297 begin
298 // no, not an escaped quote
299 InQuote := false;
300 end;
301 end;
302
303 end;
304
305 if not TagTooLong then
306 if Length( TagText ) < 200 then
307 TagText := TagText + CurrentChar
308 else
309 TagTooLong := true; // but keep going until the end
310
311 inc( TextPointer );
312 until false;
313
314end;
315
316// Expects textpointer is currently pointing at the >
317Function ExtractPreviousTag( const TextStart: PChar;
318 Var TextPointer: PChar ): TTag;
319var
320 CurrentChar: Char;
321 TagTooLong: boolean;
322 InQuote: boolean;
323begin
324 TagText := '';
325 TagTooLong := false;
326 InQuote := false;
327
328 repeat
329 dec( TextPointer );
330 if TextPointer < TextStart then
331 begin
332 // if we reach here we have reached the end of text
333 // during a tag. invalid tag.
334 Result.TagType := ttInvalid;
335 exit;
336 end;
337 CurrentChar := TextPointer^;
338
339 if ( CurrentChar = '<' )
340 and ( not InQuote ) then
341 begin
342 // found tag end.
343 if TagTooLong then
344 Result.TagType := ttInvalid
345 else
346 ParseTag( TagText, Result );
347 exit;
348 end;
349
350 if CurrentChar = DoubleQuote then
351 begin
352 if not InQuote then
353 begin
354 InQuote := true
355 end
356 else
357 begin
358 // Could be escaped quote ""
359 if TextPointer <= TextStart then
360 begin
361 // start of text... somethin weird
362 InQuote := false;
363 end
364 else if ( TextPointer - 1 ) ^ = DoubleQuote then
365 begin
366 // yes it is
367 dec( TextPointer ); // skip second one
368 end
369 else
370 begin
371 // no, not an escaped quote
372 InQuote := false;
373 end;
374 end;
375
376 end;
377
378 if not TagTooLong then
379 if Length( TagText ) < 200 then
380 TagText := CurrentChar + TagText
381 else
382 TagTooLong := true; // but keep going until the end
383
384 until false;
385
386end;
387
388function ExtractNextTextElement( TextPointer: PChar;
389 Var NextElement: PChar ): TTextElement;
390var
391 TheChar: Char;
392 NextChar: Char;
393begin
394 with Result do
395 begin
396 TheChar := TextPointer^;
397 Character := TheChar;
398 inc( TextPointer );
399
400
401 case TheChar of
402 ' ': // ---- Space (word break) found ----
403 ElementType := teWordBreak;
404
405 #10, #13: // ---- End of line found ----
406 begin
407 ElementType := teLineBreak;
408 if TheChar = #13 then
409 begin
410 TheChar := TextPointer^;
411 if TheChar = #10 then
412 // skip CR following LF
413 inc( TextPointer );
414 end;
415 end;
416
417 #0: // ---- end of text found ----
418 ElementType := teTextEnd;
419
420 '<': // ---- tag found? ----
421 begin
422 NextChar := TextPointer^;
423 if NextChar = '<' then
424 begin
425 // no. just a literal <
426 ElementType := teText;
427 inc( TextPointer );
428 end
429 else
430 begin
431 Tag := ExtractTag( TextPointer );
432 if Tag.TagType = ttImage then
433 ElementType := teImage
434 else
435 ElementType := teStyle;
436 end;
437
438 end;
439
440 '>': // check - should be double
441 begin
442 ElementType := teText;
443 NextChar := TextPointer^;
444 if NextChar = '>' then
445 inc( TextPointer );
446 end;
447
448// '-': // ---- Hyphen (ALT)
449// ElementType := teWrapChar;
450
451 else
452 ElementType := teText;
453 end;
454
455 end; // with
456
457 NextElement := TextPointer;
458end;
459
460function ExtractPreviousTextElement( const TextStart: PChar;
461 TextPointer: PChar;
462 Var NextElement: PChar ): TTextElement;
463var
464 TheChar: Char;
465 PreviousChar: Char;
466 FoundTag: boolean;
467begin
468 with Result do
469 begin
470 dec( TextPointer );
471 TheChar := TextPointer^;
472 Character := TheChar;
473 if TextPointer < TextStart then
474 begin
475 ElementType := teTextEnd;
476 exit;
477 end;
478
479 case TheChar of
480 ' ': // ---- Space (word break) found ----
481 ElementType := teWordBreak;
482
483 #10, #13: // ---- End of line found ----
484 begin
485 ElementType := teLineBreak;
486 if TheChar = #10 then
487 begin
488 dec( TextPointer );
489 TheChar := TextPointer^;
490 if TheChar = #13 then
491 begin
492 // skip CR preceeding LF
493 end
494 else
495 inc( TextPointer );
496 end;
497 end;
498
499 '>': // ---- tag found ----
500 begin
501 FoundTag := true;
502 if TextPointer > TextStart then
503 begin
504 PreviousChar := ( TextPointer - 1 )^;
505 if PreviousChar = '>' then
506 begin
507 // no. just a literal >
508 FoundTag := false;
509 ElementType := teText;
510 dec( TextPointer );
511 end
512 end;
513
514 if FoundTag then
515 begin
516 Tag := ExtractPreviousTag( TextStart, TextPointer );
517 if Tag.TagType = ttImage then
518 ElementType := teImage
519 else
520 ElementType := teStyle;
521 end;
522 end;
523
524 '<': // should be double
525 begin
526 ElementType := teText;
527 if TextPointer > TextStart then
528 begin
529 PreviousChar := TextPointer^;
530 if PreviousChar = '<' then
531 dec( TextPointer );
532 end;
533 end
534
535// '-': // ---- Hyphen (ALT)
536// ElementType := teWrapChar;
537
538 else
539 ElementType := teText;
540 end;
541 end; // with
542 NextElement := TextPointer;
543end;
544
545function GetTagColor( const ColorParam: string;
546 var Color: TColor ): boolean;
547var
548 ColorIndex: longint;
549begin
550 Result := false;
551 if ColorParam <> '' then
552 begin
553 if ColorParam[ 1 ] = '#' then
554 begin
555 try
556 Color := HexToInt( StrRightFrom( ColorParam, 2 ) );
557 Result := true;
558 except
559 end;
560 end
561 else
562 begin
563 for ColorIndex := 0 to High( StandardColors ) do
564 begin
565 if StringsSame( ColorParam, StandardColors[ ColorIndex ].Name ) then
566 begin
567 Color := StandardColors[ ColorIndex ].Color;
568 Result := true;
569 break;
570 end;
571 end;
572 end;
573 end;
574end;
575
576function GetTagTextAlignment( const AlignParam: string;
577 const Default: TTextAlignment ): TTextAlignment;
578begin
579 if StringsSame( AlignParam, 'left' ) then
580 Result := taLeft
581 else if StringsSame( AlignParam, 'center' ) then
582 Result := taCenter
583 else if StringsSame( AlignParam, 'right' ) then
584 Result := taRight
585 else
586 Result := Default;
587end;
588
589function GetTagTextWrap( const WrapParam: string ): boolean;
590begin
591 Result := StringsSame( WrapParam, 'yes' );
592end;
593
594function RichTextFindString( pRichText: PChar;
595 const Text: string;
596 var pMatch: PChar;
597 var MatchLength: longint ): boolean;
598var
599 P: PChar;
600 NextP: PChar;
601 Element: TTextElement;
602 pMatchStart: pchar;
603 pMatchStartNext: pchar;
604 MatchIndex: longint;
605
606 CountryData: COUNTRYCODE;
607 CaseMap: array[ Low( Char )..High( Char ) ] of char;
608 C: Char;
609begin
610 if Length( Text ) = 0 then
611 begin
612 // null string always matches
613 Result := true;
614 pMatch := pRichText;
615 MatchLength := 0;
616 exit;
617 end;
618
619 P := pRichText;
620
621 MatchIndex := 1;
622
623 // Get case mapping of all chars (only SBCS)
624
625 CountryData.Country := 0; // default country
626 CountryData.CodePage := 0; // default codepage
627
628 // fill array with all chars
629 for C := Low( CaseMap ) to High( CaseMap ) do
630 CaseMap[ C ] := C;
631
632 DosMapCase( sizeof( CaseMap ),
633 CountryData,
634 CaseMap );
635
636 // Now search, case insensitively
637
638 while true do
639 begin
640 Element := ExtractNextTextElement( P, NextP );
641
642 case Element.ElementType of
643 teTextEnd:
644 // end of text
645 break;
646
647 teImage,
648 teLineBreak:
649 // breaks a potential match
650 MatchIndex := 1;
651
652 teStyle:
653 ; // ignore, matches can continue
654
655 else
656 begin
657 if CaseMap[ Element.Character ]
658 = CaseMap[ Text[ MatchIndex ] ] then
659 begin
660 // found a match
661 if MatchIndex = 1 then
662 begin
663 pMatchStart := P; // store start of match
664 pMatchStartNext := NextP;
665 end;
666
667 inc( MatchIndex );
668 if MatchIndex > Length( Text ) then
669 begin
670 // found a complete match
671 Result := true;
672 pMatch := pMatchStart;
673 MatchLength := PCharDiff( P, pMatchStart )
674 + 1; // include this char
675 exit;
676 end;
677 end
678 else
679 begin
680 // not a match
681 if MatchIndex > 1 then
682 begin
683 // go back to start of match, + 1
684 NextP := pMatchStartNext;
685 MatchIndex := 1;
686 end;
687 end;
688 end;
689 end;
690
691 P := NextP;
692 end;
693
694 // no match found
695 Result := false;
696 pMatch := nil;
697 MatchLength := 0;
698end;
699
700function RichTextWordLeft( pRichText: PChar;
701 pStart: PChar ): PChar;
702Var
703 P: PChar;
704 NextP: PChar;
705 Element: TTextElement;
706begin
707 P := pStart;
708
709 // skip whitespace/tags...
710 Element := ExtractPreviousTextElement( pRichText, P, NextP );
711 P := NextP;
712 while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
713 begin
714 Element := ExtractPreviousTextElement( pRichText, P, NextP );
715 P := NextP;
716 end;
717 if Element.ElementType = teTextEnd then
718 begin
719 Result := P;
720 // out of text
721 exit;
722 end;
723
724 // back to start of word, skip text/tags
725 while true do
726 begin
727 Element := ExtractPreviousTextElement( pRichText, P, NextP );
728 if not ( Element.ElementType in [ teText, teStyle ] ) then
729 break;
730 P := NextP;
731 end;
732 Result := P;
733end;
734
735function RichTextWordRight( pStart: PChar ): PChar;
736Var
737 P: PChar;
738 NextP: PChar;
739 Element: TTextElement;
740begin
741 P := pStart;
742
743 // skip text/tags...
744 Element := ExtractNextTextElement( P, NextP );
745 while Element.ElementType in [ teStyle, teText ] do
746 begin
747 P := NextP;
748 Element := ExtractNextTextElement( P, NextP );
749 end;
750 if Element.ElementType <> teTextEnd then
751 begin
752 // skip whitespace
753 Element := ExtractNextTextElement( P, NextP );
754 while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
755 begin
756 P := NextP;
757 Element := ExtractNextTextElement( P, NextP );
758 end;
759 end;
760
761 Result := P;
762end;
763
764function RichTextWordAt( pRichText: PChar;
765 pStart: PChar;
766 Var pWordStart: PChar;
767 Var WordLength: longint ): boolean;
768Var
769 P: PChar;
770 NextP: PChar;
771 Element: TTextElement;
772 pWordEnd: PChar;
773begin
774 P := pStart;
775 Element := ExtractNextTextElement( P, NextP );
776 if not ( Element.ElementType in [ teStyle, teText ] ) then
777 begin
778 // not in a word.
779 result := false;
780 pWordStart := nil;
781 WordLength := 0;
782 exit;
783 end;
784 // find end of the word
785 while Element.ElementType in [ teStyle, teText ] do
786 begin
787 P := NextP;
788 Element := ExtractNextTextElement( P, NextP );
789 end;
790 pWordEnd := P;
791
792 P := pStart;
793 Element := ExtractPreviousTextElement( pRichText, P, NextP );
794 while Element.ElementType in [ teStyle, teText ] do
795 begin
796 P := NextP;
797 Element := ExtractPreviousTextElement( pRichText, P, NextP );
798 end;
799 pWordStart := P;
800 WordLength := PCharDiff( pWordEnd, pWordStart );
801 Result := true;
802end;
803
804function CopyPlainTextToBuffer( StartP: PChar;
805 EndP: PChar;
806 Buffer: PChar;
807 BufferLength: longint ): longint;
808var
809 Q: PChar;
810 EndQ: Pchar;
811 P: PChar;
812 NextP: PChar;
813 Element: TTextElement;
814begin
815 P := StartP;
816 Q := Buffer;
817 EndQ := Buffer + BufferLength;
818
819 while P < EndP do
820 begin
821 Element := ExtractNextTextElement( P, NextP );
822 case Element.ElementType of
823 teText, teWordBreak:
824 begin
825 // copy char
826 if Buffer <> nil then
827 Q[ 0 ] := Element.Character;
828 inc( Q );
829 end;
830
831 teLineBreak:
832 begin
833 if Buffer <> nil then
834 Q[ 0 ] := #13;
835 inc( Q );
836 if Q = EndQ then
837 // end of buffer
838 break;
839
840 if Buffer <> nil then
841 Q[ 0 ] := #10;
842 inc( Q );
843 end;
844 end;
845
846 if Q = EndQ then
847 // end of buffer
848 break;
849
850 P := NextP;
851 end;
852 result := PCharDiff( Q, Buffer );
853end;
854
855// ALT begins
856//
857// Check for special text element types that depend on context.
858//
859procedure CheckSpecialElementType( const Character: Char;
860 var ElementType: TTextElementType;
861 var InsideDBC: Boolean;
862 const Codepage: LongInt );
863var
864 CharByte: Byte;
865begin
866 if Codepage in [ 874, 932, 936, 942, 943, 949, 950, 1381, 1386 ] then
867 begin
868 CharByte := ord( Character );
869 if InsideDBC then
870 begin
871 InsideDBC := false;
872 // sanity check for corrupt text sequence (definitely not foolproof)
873 if IsDBCSSecondByte( CharByte, Codepage ) then
874 ElementType := teSecondByte
875 else
876 ElementType := teText;
877 end
878 else
879 begin
880 if IsAsianWrapChar( CharByte, Codepage ) then
881 begin
882 ElementType := teWrapChar;
883 InsideDBC := false;
884 end
885 else if IsDBCSLeadByte( CharByte, Codepage ) then
886 begin
887 ElementType := teLeadByte;
888 InsideDBC := true;
889 end;
890 end;
891 end;
892end;
893
894// Check if this (single-byte) character is a legal wrap point under certain
895// Asian codepages. This is really only used for Thai and for Japanese
896// half-width katakana; other DBCS languages use double-byte characters for all
897// their native glyphs.
898//
899function IsAsianWrapChar( const CharByte: Byte;
900 const Codepage: LongInt ): boolean;
901begin
902 Result := false;
903
904 if ( CharByte < $80) then
905 exit;
906
907 case Codepage of
908 932, 942, 943: // Japanese
909 if CharByte in [ $A2, $A6, $B1..$DD ] then
910 Result := true;
911 874: // Thai
912 Result := true;
913 end;
914end;
915
916// Check if this is the lead byte of a double-byte character. This is essential
917// to know in certain cases:
918// - Nothing must ever be inserted between such a byte and the next byte
919// (e.g. line break, tag, etc).
920// - Cursor position must never be set between such a byte and the next byte.
921// - Selection state must never change between such a byte and the next byte.
922//
923function IsDBCSLeadByte( const CharByte: Byte;
924 const Codepage: LongInt ): boolean;
925begin
926 Result := false;
927
928 case Codepage of
929 932, 942, 943: // Japanese
930 if CharByte in [ $81..$9F, $E0..$FC ] then
931 Result := true;
932 949: // Korean KSC
933 if CharByte in [ $85..$FE ] then
934 Result := true;
935 1381: // Chinese GB2312
936 if CharByte in [ $8C..$FE ] then
937 Result := true;
938 936, 950, 1386: // Chinese BIG-5 or GBK
939 if CharByte in [ $81..$FE ] then
940 Result := true;
941 end;
942end;
943
944// Check to see if this byte is a valid second byte in a double-byte character.
945// (This doesn't guarantee that it IS such a byte, only that it COULD be. The
946// caller is assumed to know whether we're in a double byte character or not.)
947//
948function IsDBCSSecondByte( const CharByte: Byte;
949 const Codepage: LongInt ): boolean;
950begin
951 Result := false;
952
953 case Codepage of
954 932, 936, 942, 943, 949, 950, 1386:
955 if CharByte >= $40 then
956 Result := true;
957 1381:
958 if CharByte >= $A1 then
959 Result := true;
960 end;
961end;
962
963// Given a string position, check to see if it's in the middle of a double-byte
964// character; if so, move back by one position so that we're sitting immediately
965// in front of the double-byte character instead.
966//
967procedure MoveToCharacterBoundary( TextPointer: PChar;
968 var Index: LongInt;
969 var Offset: LongInt;
970 RowStart: LongInt;
971 Codepage: LongInt;
972 Advance: Boolean; );
973var
974 P: PChar;
975 NextP: PChar;
976 Element: TTextElement;
977 InsideDBC: boolean;
978begin
979 if ( Offset > 0 ) and
980 ( Codepage in [ 932, 936, 942, 943, 949, 950, 1381, 1386 ]) then
981 begin
982 P := TextPointer + RowStart;
983 InsideDBC := false;
984
985 // Because parsing of byte types is state based, we must verify every
986 // byte's type from the beginning of the line until we reach the target.
987 while RowStart < Index do
988 begin
989 Element := ExtractNextTextElement( P, NextP );
990 CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
991 P := NextP;
992 inc( RowStart );
993 end;
994
995 // We've reached the target position, and the current parsing state should
996 // be correctly set. So now we can safely determine the target byte's type.
997 Element := ExtractNextTextElement( P, NextP );
998 CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
999 if InsideDBC then
1000 begin
1001 // If this the first byte of a double byte character, move position by one.
1002 if Advance Then
1003 Begin
1004 inc( Index );
1005 inc( Offset );
1006 End
1007 Else
1008 Begin
1009 dec( Index );
1010 dec( Offset );
1011 End
1012 end;
1013 end;
1014
1015end;
1016//
1017// ALT ends
1018
1019Initialization
1020End.
Note: See TracBrowser for help on using the repository browser.