source: trunk/Components/RichTextDocumentUnit.pas@ 418

Last change on this file since 418 was 418, checked in by ataylor, 6 years ago

Experimental new logic to try and fix DBCS text wrapping.

  • Property svn:eol-style set to native
File size: 25.4 KB
Line 
1Unit RichTextDocumentUnit;
2
3// Declarations of tags, and parsing functions
4
5Interface
6
7uses
8 Classes;
9
10type
11 TTagType = ( ttInvalid,
12 ttBold, ttBoldOff,
13 ttItalic, ttItalicOff,
14 ttUnderline, ttUnderlineOff,
15 ttFixedWidthOn, ttFixedWidthOff,
16 ttHeading1, ttHeading2, ttHeading3, ttHeadingOff,
17 ttColor, ttColorOff,
18 ttBackgroundColor, ttBackgroundColorOff,
19 ttRed, ttBlue, ttGreen, ttBlack,
20 ttWrap,
21 ttAlign,
22 ttBeginLink, ttEndLink,
23 ttSetLeftMargin, ttSetRightMargin,
24 ttImage,
25 ttFont, ttFontOff,
26 ttEnd );
27
28 TStandardColor = record
29 Name: string[ 32 ];
30 Color: TColor;
31 end;
32
33 TTag = record
34 TagType: TTagType;
35 Arguments: string;
36 end;
37
38 TTextElementType = ( teText, // a character
39 teWordBreak,
40 teLineBreak, // end of para
41 teTextEnd,
42 teImage,
43 teStyle,
44 teWrapChar, // A non-whitespace wrappable character (ALT)
45 teLeadByte, // DBCS lead byte (ALT)
46 teSecondByte ); // DBCS secondary byte (ALT)
47
48 TTextElement = record
49 ElementType: TTextElementType;
50 Character: Char;
51 Tag: TTag;
52 end;
53
54 TTextAlignment = ( taLeft,
55 taRight,
56 taCenter );
57
58// Returns tag pointed to by TextPointer and
59// moves TextPointer to the first char after the tag.
60Function ExtractTag( Var TextPointer: PChar ): TTag;
61
62// Returns tag ending at TextPointer
63// (Expects textpointer is currently pointing at the >)
64// and moves TextPointer to the first char of the tag
65Function ExtractPreviousTag( const TextStart: PChar;
66 Var TextPointer: PChar ): TTag;
67
68function ExtractNextTextElement( TextPointer: PChar;
69 Var NextElement: PChar ): TTextElement;
70
71function ExtractPreviousTextElement( const TextStart: PChar;
72 TextPointer: PChar;
73 Var NextElement: PChar ): TTextElement;
74
75// Parse a color name or value (#hexval). Returns true if valid
76function GetTagColor( const ColorParam: string;
77 var Color: TColor ): boolean;
78
79function GetTagTextAlignment( const AlignParam: string;
80 const Default: TTextAlignment ): TTextAlignment;
81
82function GetTagTextWrap( const WrapParam: string ): boolean;
83
84// Search within a rich text document for the given text
85// if found, returns true, pMatch is set to the first match,
86// and MatchLength returns the length of the match
87// (which may be greater than the length of Text due to
88// to skipping tags)
89// if not found, returns false, pMatch is set to nil
90function RichTextFindString( pRichText: PChar;
91 const Text: string;
92 var pMatch: PChar;
93 var MatchLength: longint ): boolean;
94
95// Returns the start of the previous word,
96// or the current word if pStart is in the middle of the word
97function RichTextWordLeft( pRichText: PChar;
98 pStart: PChar ): PChar;
99
100// Returns the start of the next word.
101function RichTextWordRight( pStart: PChar ): PChar;
102
103// If pStart is in the middle of a word, then
104// returns true and sets the start and length of the word
105function RichTextWordAt( pRichText: PChar;
106 pStart: PChar;
107 Var pWordStart: PChar;
108 Var WordLength: longint ): boolean;
109
110// Copies plaintext of richtext starting at StartP
111// to the given buffer. Returns number of characters copied.
112// Buffer may be nil
113// If BufferLength is negative, it is effectively ignored
114function CopyPlainTextToBuffer( StartP: PChar;
115 EndP: PChar;
116 Buffer: PChar;
117 BufferLength: longint ): longint;
118
119
120// ALT begins
121//
122
123// Check for special text element types and adjust value accordingly.
124procedure CheckSpecialElementType( const Character: Char;
125 var ElementType: TTextElementType;
126 var InsideDBC: Boolean;
127 const Codepage: LongInt );
128
129// Returns true if the given byte value is a legally-wrappable single-byte
130// character under the given Asian codepage.
131function IsAsianWrapChar( const CharByte: Byte;
132 const Codepage: LongInt ): boolean;
133
134// Returns true if the given byte value is the leading byte of a multi-byte
135// character under the given Asian codepage.
136function IsDBCSLeadByte( const CharByte: Byte;
137 const Codepage: LongInt ): boolean;
138
139// Returns true if the given byte value is valid as a possible second byte of
140// a multi-byte character (this does not guarantee that it IS one, just that
141// it COULD be).
142function IsDBCSSecondByte( const CharByte: Byte;
143 const Codepage: LongInt ): boolean;
144
145// Adjusts the character position to the beginning of any multi-byte character.
146procedure MoveToCharacterBoundary( TextPointer: PChar;
147 var Index: LongInt;
148 var Offset: LongInt;
149 RowStart: LongInt;
150 Codepage: LongInt );
151//
152// ALT ends
153
154
155Implementation
156
157uses
158 BseDOS, // for NLS/case mapping
159 SysUtils,
160 ACLStringUtility;
161
162const
163 TagStr: array[ ttInvalid .. ttEnd ] of string =
164 (
165 '', //
166 'b',
167 '/b',
168 'i',
169 '/i',
170 'u',
171 '/u',
172 'tt',
173 '/tt',
174 'h1',
175 'h2',
176 'h3',
177 '/h',
178 'color',
179 '/color',
180 'backcolor',
181 '/backcolor',
182 'red',
183 'blue',
184 'green',
185 'black',
186 'wrap',
187 'align',
188 'link',
189 '/link',
190 'leftmargin',
191 'rightmargin',
192 'image',
193 'font',
194 '/font',
195 ''
196 );
197
198 StandardColors: array[ 0..7 ] of TStandardColor =
199 (
200 ( Name : 'white' ; Color: clWhite ),
201 ( Name : 'black' ; Color: clBlack ),
202 ( Name : 'red' ; Color: clRed ),
203 ( Name : 'blue' ; Color: clBlue ),
204 ( Name : 'green' ; Color: clLime ),
205 ( Name : 'purple'; Color: clFuchsia ),
206 ( Name : 'yellow'; Color: clYellow ),
207 ( Name : 'cyan' ; Color: clAqua )
208 );
209
210Procedure ParseTag( const Text: string;
211 Var Tag: TTag );
212var
213 TagType: TTagType;
214 TagTypeText: string;
215 SpacePos: longint;
216begin
217 SpacePos := Pos( ' ', Text );
218 if SpacePos <> 0 then
219 begin
220 Tag.Arguments := trim( Copy( Text, SpacePos + 1, 255 ) );
221 TagTypeText := LowerCase( Copy( Text, 1, SpacePos - 1 ) );
222 end
223 else
224 begin
225 Tag.Arguments := ''; // to save time copying when not needed
226 TagTypeText := LowerCase( Text );
227 end;
228
229 for TagType := ttBold to ttEnd do
230 begin
231 if TagStr[ TagType ] = TagTypeText then
232 begin
233 Tag.TagType := TagType;
234 exit;
235 end;
236 end;
237
238 // not found
239 Tag.TagType := ttInvalid;
240end;
241
242var
243 TagText: string;
244 TagArgText: string;
245
246Function ExtractTag( Var TextPointer: PChar ): TTag;
247var
248 CurrentChar: Char;
249 TagTooLong: boolean;
250 InQuote: boolean;
251begin
252// assert( TextPointer[ 0 ] = '<' );
253 TagText := '';
254 TagTooLong := false;
255 InQuote := false;
256
257 repeat
258 CurrentChar := TextPointer^;
259
260 if ( CurrentChar = '>' )
261 and ( not InQuote ) then
262 begin
263 // found tag end.
264 if TagTooLong then
265 Result.TagType := ttInvalid
266 else
267 ParseTag( TagText, Result );
268 inc( TextPointer );
269 exit;
270 end;
271
272 if CurrentChar = #0 then
273 begin
274 // if we reach here we have reached the end of text
275 // during a tag. invalid tag.
276 Result.TagType := ttInvalid;
277 exit;
278 end;
279
280 if CurrentChar = DoubleQuote then
281 begin
282 if not InQuote then
283 begin
284 InQuote := true
285 end
286 else
287 begin
288 // Could be escaped quote ""
289 if ( TextPointer + 1 ) ^ = DoubleQuote then
290 begin
291 // yes it is
292 inc( TextPointer ); // skip second one
293 end
294 else
295 begin
296 // no, not an escaped quote
297 InQuote := false;
298 end;
299 end;
300
301 end;
302
303 if not TagTooLong then
304 if Length( TagText ) < 200 then
305 TagText := TagText + CurrentChar
306 else
307 TagTooLong := true; // but keep going until the end
308
309 inc( TextPointer );
310 until false;
311
312end;
313
314// Expects textpointer is currently pointing at the >
315Function ExtractPreviousTag( const TextStart: PChar;
316 Var TextPointer: PChar ): TTag;
317var
318 CurrentChar: Char;
319 TagTooLong: boolean;
320 InQuote: boolean;
321begin
322 TagText := '';
323 TagTooLong := false;
324 InQuote := false;
325
326 repeat
327 dec( TextPointer );
328 if TextPointer < TextStart then
329 begin
330 // if we reach here we have reached the end of text
331 // during a tag. invalid tag.
332 Result.TagType := ttInvalid;
333 exit;
334 end;
335 CurrentChar := TextPointer^;
336
337 if ( CurrentChar = '<' )
338 and ( not InQuote ) then
339 begin
340 // found tag end.
341 if TagTooLong then
342 Result.TagType := ttInvalid
343 else
344 ParseTag( TagText, Result );
345 exit;
346 end;
347
348 if CurrentChar = DoubleQuote then
349 begin
350 if not InQuote then
351 begin
352 InQuote := true
353 end
354 else
355 begin
356 // Could be escaped quote ""
357 if TextPointer <= TextStart then
358 begin
359 // start of text... somethin weird
360 InQuote := false;
361 end
362 else if ( TextPointer - 1 ) ^ = DoubleQuote then
363 begin
364 // yes it is
365 dec( TextPointer ); // skip second one
366 end
367 else
368 begin
369 // no, not an escaped quote
370 InQuote := false;
371 end;
372 end;
373
374 end;
375
376 if not TagTooLong then
377 if Length( TagText ) < 200 then
378 TagText := CurrentChar + TagText
379 else
380 TagTooLong := true; // but keep going until the end
381
382 until false;
383
384end;
385
386function ExtractNextTextElement( TextPointer: PChar;
387 Var NextElement: PChar ): TTextElement;
388var
389 TheChar: Char;
390 NextChar: Char;
391begin
392 with Result do
393 begin
394 TheChar := TextPointer^;
395 Character := TheChar;
396 inc( TextPointer );
397
398
399 case TheChar of
400 ' ': // ---- Space (word break) found ----
401 ElementType := teWordBreak;
402
403 #10, #13: // ---- End of line found ----
404 begin
405 ElementType := teLineBreak;
406 if TheChar = #13 then
407 begin
408 TheChar := TextPointer^;
409 if TheChar = #10 then
410 // skip CR following LF
411 inc( TextPointer );
412 end;
413 end;
414
415 #0: // ---- end of text found ----
416 ElementType := teTextEnd;
417
418 '<': // ---- tag found? ----
419 begin
420 NextChar := TextPointer^;
421 if NextChar = '<' then
422 begin
423 // no. just a literal <
424 ElementType := teText;
425 inc( TextPointer );
426 end
427 else
428 begin
429 Tag := ExtractTag( TextPointer );
430 if Tag.TagType = ttImage then
431 ElementType := teImage
432 else
433 ElementType := teStyle;
434 end;
435
436 end;
437
438 '>': // check - should be double
439 begin
440 ElementType := teText;
441 NextChar := TextPointer^;
442 if NextChar = '>' then
443 inc( TextPointer );
444 end;
445
446// '-': // ---- Hyphen (ALT)
447// ElementType := teWrapChar;
448
449 else
450 ElementType := teText;
451 end;
452
453 end; // with
454
455 NextElement := TextPointer;
456end;
457
458function ExtractPreviousTextElement( const TextStart: PChar;
459 TextPointer: PChar;
460 Var NextElement: PChar ): TTextElement;
461var
462 TheChar: Char;
463 PreviousChar: Char;
464 FoundTag: boolean;
465begin
466 with Result do
467 begin
468 dec( TextPointer );
469 TheChar := TextPointer^;
470 Character := TheChar;
471 if TextPointer < TextStart then
472 begin
473 ElementType := teTextEnd;
474 exit;
475 end;
476
477 case TheChar of
478 ' ': // ---- Space (word break) found ----
479 ElementType := teWordBreak;
480
481 #10, #13: // ---- End of line found ----
482 begin
483 ElementType := teLineBreak;
484 if TheChar = #10 then
485 begin
486 dec( TextPointer );
487 TheChar := TextPointer^;
488 if TheChar = #13 then
489 begin
490 // skip CR preceeding LF
491 end
492 else
493 inc( TextPointer );
494 end;
495 end;
496
497 '>': // ---- tag found ----
498 begin
499 FoundTag := true;
500 if TextPointer > TextStart then
501 begin
502 PreviousChar := ( TextPointer - 1 )^;
503 if PreviousChar = '>' then
504 begin
505 // no. just a literal >
506 FoundTag := false;
507 ElementType := teText;
508 dec( TextPointer );
509 end
510 end;
511
512 if FoundTag then
513 begin
514 Tag := ExtractPreviousTag( TextStart, TextPointer );
515 if Tag.TagType = ttImage then
516 ElementType := teImage
517 else
518 ElementType := teStyle;
519 end;
520 end;
521
522 '<': // should be double
523 begin
524 ElementType := teText;
525 if TextPointer > TextStart then
526 begin
527 PreviousChar := TextPointer^;
528 if PreviousChar = '<' then
529 dec( TextPointer );
530 end;
531 end
532
533// '-': // ---- Hyphen (ALT)
534// ElementType := teWrapChar;
535
536 else
537 ElementType := teText;
538 end;
539 end; // with
540 NextElement := TextPointer;
541end;
542
543function GetTagColor( const ColorParam: string;
544 var Color: TColor ): boolean;
545var
546 ColorIndex: longint;
547begin
548 Result := false;
549 if ColorParam <> '' then
550 begin
551 if ColorParam[ 1 ] = '#' then
552 begin
553 try
554 Color := HexToInt( StrRightFrom( ColorParam, 2 ) );
555 Result := true;
556 except
557 end;
558 end
559 else
560 begin
561 for ColorIndex := 0 to High( StandardColors ) do
562 begin
563 if StringsSame( ColorParam, StandardColors[ ColorIndex ].Name ) then
564 begin
565 Color := StandardColors[ ColorIndex ].Color;
566 Result := true;
567 break;
568 end;
569 end;
570 end;
571 end;
572end;
573
574function GetTagTextAlignment( const AlignParam: string;
575 const Default: TTextAlignment ): TTextAlignment;
576begin
577 if StringsSame( AlignParam, 'left' ) then
578 Result := taLeft
579 else if StringsSame( AlignParam, 'center' ) then
580 Result := taCenter
581 else if StringsSame( AlignParam, 'right' ) then
582 Result := taRight
583 else
584 Result := Default;
585end;
586
587function GetTagTextWrap( const WrapParam: string ): boolean;
588begin
589 Result := StringsSame( WrapParam, 'yes' );
590end;
591
592function RichTextFindString( pRichText: PChar;
593 const Text: string;
594 var pMatch: PChar;
595 var MatchLength: longint ): boolean;
596var
597 P: PChar;
598 NextP: PChar;
599 Element: TTextElement;
600 pMatchStart: pchar;
601 pMatchStartNext: pchar;
602 MatchIndex: longint;
603
604 CountryData: COUNTRYCODE;
605 CaseMap: array[ Low( Char )..High( Char ) ] of char;
606 C: Char;
607begin
608 if Length( Text ) = 0 then
609 begin
610 // null string always matches
611 Result := true;
612 pMatch := pRichText;
613 MatchLength := 0;
614 exit;
615 end;
616
617 P := pRichText;
618
619 MatchIndex := 1;
620
621 // Get case mapping of all chars (only SBCS)
622
623 CountryData.Country := 0; // default country
624 CountryData.CodePage := 0; // default codepage
625
626 // fill array with all chars
627 for C := Low( CaseMap ) to High( CaseMap ) do
628 CaseMap[ C ] := C;
629
630 DosMapCase( sizeof( CaseMap ),
631 CountryData,
632 CaseMap );
633
634 // Now search, case insensitively
635
636 while true do
637 begin
638 Element := ExtractNextTextElement( P, NextP );
639
640 case Element.ElementType of
641 teTextEnd:
642 // end of text
643 break;
644
645 teImage,
646 teLineBreak:
647 // breaks a potential match
648 MatchIndex := 1;
649
650 teStyle:
651 ; // ignore, matches can continue
652
653 else
654 begin
655 if CaseMap[ Element.Character ]
656 = CaseMap[ Text[ MatchIndex ] ] then
657 begin
658 // found a match
659 if MatchIndex = 1 then
660 begin
661 pMatchStart := P; // store start of match
662 pMatchStartNext := NextP;
663 end;
664
665 inc( MatchIndex );
666 if MatchIndex > Length( Text ) then
667 begin
668 // found a complete match
669 Result := true;
670 pMatch := pMatchStart;
671 MatchLength := PCharDiff( P, pMatchStart )
672 + 1; // include this char
673 exit;
674 end;
675 end
676 else
677 begin
678 // not a match
679 if MatchIndex > 1 then
680 begin
681 // go back to start of match, + 1
682 NextP := pMatchStartNext;
683 MatchIndex := 1;
684 end;
685 end;
686 end;
687 end;
688
689 P := NextP;
690 end;
691
692 // no match found
693 Result := false;
694 pMatch := nil;
695 MatchLength := 0;
696end;
697
698function RichTextWordLeft( pRichText: PChar;
699 pStart: PChar ): PChar;
700Var
701 P: PChar;
702 NextP: PChar;
703 Element: TTextElement;
704begin
705 P := pStart;
706
707 // skip whitespace/tags...
708 Element := ExtractPreviousTextElement( pRichText, P, NextP );
709 P := NextP;
710 while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
711 begin
712 Element := ExtractPreviousTextElement( pRichText, P, NextP );
713 P := NextP;
714 end;
715 if Element.ElementType = teTextEnd then
716 begin
717 Result := P;
718 // out of text
719 exit;
720 end;
721
722 // back to start of word, skip text/tags
723 while true do
724 begin
725 Element := ExtractPreviousTextElement( pRichText, P, NextP );
726 if not ( Element.ElementType in [ teText, teStyle ] ) then
727 break;
728 P := NextP;
729 end;
730 Result := P;
731end;
732
733function RichTextWordRight( pStart: PChar ): PChar;
734Var
735 P: PChar;
736 NextP: PChar;
737 Element: TTextElement;
738begin
739 P := pStart;
740
741 // skip text/tags...
742 Element := ExtractNextTextElement( P, NextP );
743 while Element.ElementType in [ teStyle, teText ] do
744 begin
745 P := NextP;
746 Element := ExtractNextTextElement( P, NextP );
747 end;
748 if Element.ElementType <> teTextEnd then
749 begin
750 // skip whitespace
751 Element := ExtractNextTextElement( P, NextP );
752 while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
753 begin
754 P := NextP;
755 Element := ExtractNextTextElement( P, NextP );
756 end;
757 end;
758
759 Result := P;
760end;
761
762function RichTextWordAt( pRichText: PChar;
763 pStart: PChar;
764 Var pWordStart: PChar;
765 Var WordLength: longint ): boolean;
766Var
767 P: PChar;
768 NextP: PChar;
769 Element: TTextElement;
770 pWordEnd: PChar;
771begin
772 P := pStart;
773 Element := ExtractNextTextElement( P, NextP );
774 if not ( Element.ElementType in [ teStyle, teText ] ) then
775 begin
776 // not in a word.
777 result := false;
778 pWordStart := nil;
779 WordLength := 0;
780 exit;
781 end;
782 // find end of the word
783 while Element.ElementType in [ teStyle, teText ] do
784 begin
785 P := NextP;
786 Element := ExtractNextTextElement( P, NextP );
787 end;
788 pWordEnd := P;
789
790 P := pStart;
791 Element := ExtractPreviousTextElement( pRichText, P, NextP );
792 while Element.ElementType in [ teStyle, teText ] do
793 begin
794 P := NextP;
795 Element := ExtractPreviousTextElement( pRichText, P, NextP );
796 end;
797 pWordStart := P;
798 WordLength := PCharDiff( pWordEnd, pWordStart );
799 Result := true;
800end;
801
802function CopyPlainTextToBuffer( StartP: PChar;
803 EndP: PChar;
804 Buffer: PChar;
805 BufferLength: longint ): longint;
806var
807 Q: PChar;
808 EndQ: Pchar;
809 P: PChar;
810 NextP: PChar;
811 Element: TTextElement;
812begin
813 P := StartP;
814 Q := Buffer;
815 EndQ := Buffer + BufferLength;
816
817 while P < EndP do
818 begin
819 Element := ExtractNextTextElement( P, NextP );
820 case Element.ElementType of
821 teText, teWordBreak:
822 begin
823 // copy char
824 if Buffer <> nil then
825 Q[ 0 ] := Element.Character;
826 inc( Q );
827 end;
828
829 teLineBreak:
830 begin
831 if Buffer <> nil then
832 Q[ 0 ] := #13;
833 inc( Q );
834 if Q = EndQ then
835 // end of buffer
836 break;
837
838 if Buffer <> nil then
839 Q[ 0 ] := #10;
840 inc( Q );
841 end;
842 end;
843
844 if Q = EndQ then
845 // end of buffer
846 break;
847
848 P := NextP;
849 end;
850 result := PCharDiff( Q, Buffer );
851end;
852
853// ALT begins
854//
855// Check for special text element types that depend on context.
856//
857procedure CheckSpecialElementType( const Character: Char;
858 var ElementType: TTextElementType;
859 var InsideDBC: Boolean;
860 const Codepage: LongInt );
861var
862 CharByte: Byte;
863begin
864 if Codepage in [ 874, 932, 936, 942, 943, 949, 950, 1381, 1386 ] then
865 begin
866 CharByte := ord( Character );
867 if InsideDBC then
868 begin
869 InsideDBC := false;
870 // sanity check for corrupt text sequence (definitely not foolproof)
871 if IsDBCSSecondByte( CharByte, Codepage ) then
872 ElementType := teSecondByte
873 else
874 ElementType := teText;
875 end
876 else
877 begin
878 if IsAsianWrapChar( CharByte, Codepage ) then
879 begin
880 ElementType := teWrapChar;
881 InsideDBC := false;
882 end
883 else if IsDBCSLeadByte( CharByte, Codepage ) then
884 begin
885 ElementType := teLeadByte;
886 InsideDBC := true;
887 end;
888 end;
889 end;
890end;
891
892// Check if this (single-byte) character is a legal wrap point under certain
893// Asian codepages. This is really only used for Thai and for Japanese
894// half-width katakana; other DBCS languages use double-byte characters for all
895// their native glyphs.
896//
897function IsAsianWrapChar( const CharByte: Byte;
898 const Codepage: LongInt ): boolean;
899begin
900 Result := false;
901
902 if ( CharByte < $80) then
903 exit;
904
905 case Codepage of
906 932, 942, 943: // Japanese
907 if CharByte in [ $A2, $A6, $B1..$DD ] then
908 Result := true;
909 874: // Thai
910 Result := true;
911 end;
912end;
913
914// Check if this is the lead byte of a double-byte character. This is essential
915// to know in certain cases:
916// - Nothing must ever be inserted between such a byte and the next byte
917// (e.g. line break, tag, etc).
918// - Cursor position must never be set between such a byte and the next byte.
919// - Selection state must never change between such a byte and the next byte.
920//
921function IsDBCSLeadByte( const CharByte: Byte;
922 const Codepage: LongInt ): boolean;
923begin
924 Result := false;
925
926 case Codepage of
927 932, 942, 943: // Japanese
928 if CharByte in [ $81..$9F, $E0..$FC ] then
929 Result := true;
930 949: // Korean KSC
931 if CharByte in [ $85..$FE ] then
932 Result := true;
933 1381: // Chinese GB2312
934 if CharByte in [ $8C..$FE ] then
935 Result := true;
936 936, 950, 1386: // Chinese BIG-5 or GBK
937 if CharByte in [ $81..$FE ] then
938 Result := true;
939 end;
940end;
941
942// Check to see if this byte is a valid second byte in a double-byte character.
943// (This doesn't guarantee that it IS such a byte, only that it COULD be. The
944// caller is assumed to know whether we're in a double byte character or not.)
945//
946function IsDBCSSecondByte( const CharByte: Byte;
947 const Codepage: LongInt ): boolean;
948begin
949 Result := false;
950
951 case Codepage of
952 932, 936, 942, 943, 949, 950, 1386:
953 if CharByte >= $40 then
954 Result := true;
955 1381:
956 if CharByte >= $A1 then
957 Result := true;
958 end;
959end;
960
961// Given a string position, check to see if it's in the middle of a double-byte
962// character; if so, move back by one position so that we're sitting immediately
963// in front of the double-byte character instead.
964//
965procedure MoveToCharacterBoundary( TextPointer: PChar;
966 var Index: LongInt;
967 var Offset: LongInt;
968 RowStart: LongInt;
969 Codepage: LongInt );
970var
971 P: PChar;
972 NextP: PChar;
973 Element: TTextElement;
974 InsideDBC: boolean;
975begin
976 if ( Offset > 0 ) and
977 ( Codepage in [ 932, 936, 942, 943, 949, 950, 1381, 1386 ]) then
978 begin
979 P := TextPointer + RowStart;
980 InsideDBC := false;
981
982 // Because parsing of byte types is state based, we must verify every
983 // byte's type from the beginning of the line until we reach the target.
984 while RowStart < Index do
985 begin
986 Element := ExtractNextTextElement( P, NextP );
987 CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
988 P := NextP;
989 inc( RowStart );
990 end;
991{
992 // We've reached the target position, and the current parsing state should
993 // be correctly set. So now we can safely determine the target byte's type.
994 Element := ExtractNextTextElement( P, NextP );
995 CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
996}
997 if InsideDBC then
998 begin
999 // If this the first byte of a double byte character, move back by one.
1000 dec( Index );
1001 dec( Offset );
1002 end;
1003 end;
1004
1005end;
1006//
1007// ALT ends
1008
1009Initialization
1010End.
Note: See TracBrowser for help on using the repository browser.