Context Navigation

source: trunk/Components/RichTextDocumentUnit.pas@ 418

Last change on this file since 418 was 418, checked in by ataylor, 6 years ago
Experimental new logic to try and fix DBCS text wrapping.
Property svn:eol-style set to `native`
File size: 25.4 KB

Line
1	Unit RichTextDocumentUnit;
2
3	// Declarations of tags, and parsing functions
4
5	Interface
6
7	uses
8	Classes;
9
10	type
11	TTagType = ( ttInvalid,
12	ttBold, ttBoldOff,
13	ttItalic, ttItalicOff,
14	ttUnderline, ttUnderlineOff,
15	ttFixedWidthOn, ttFixedWidthOff,
16	ttHeading1, ttHeading2, ttHeading3, ttHeadingOff,
17	ttColor, ttColorOff,
18	ttBackgroundColor, ttBackgroundColorOff,
19	ttRed, ttBlue, ttGreen, ttBlack,
20	ttWrap,
21	ttAlign,
22	ttBeginLink, ttEndLink,
23	ttSetLeftMargin, ttSetRightMargin,
24	ttImage,
25	ttFont, ttFontOff,
26	ttEnd );
27
28	TStandardColor = record
29	Name: string[ 32 ];
30	Color: TColor;
31	end;
32
33	TTag = record
34	TagType: TTagType;
35	Arguments: string;
36	end;
37
38	TTextElementType = ( teText, // a character
39	teWordBreak,
40	teLineBreak, // end of para
41	teTextEnd,
42	teImage,
43	teStyle,
44	teWrapChar, // A non-whitespace wrappable character (ALT)
45	teLeadByte, // DBCS lead byte (ALT)
46	teSecondByte ); // DBCS secondary byte (ALT)
47
48	TTextElement = record
49	ElementType: TTextElementType;
50	Character: Char;
51	Tag: TTag;
52	end;
53
54	TTextAlignment = ( taLeft,
55	taRight,
56	taCenter );
57
58	// Returns tag pointed to by TextPointer and
59	// moves TextPointer to the first char after the tag.
60	Function ExtractTag( Var TextPointer: PChar ): TTag;
61
62	// Returns tag ending at TextPointer
63	// (Expects textpointer is currently pointing at the >)
64	// and moves TextPointer to the first char of the tag
65	Function ExtractPreviousTag( const TextStart: PChar;
66	Var TextPointer: PChar ): TTag;
67
68	function ExtractNextTextElement( TextPointer: PChar;
69	Var NextElement: PChar ): TTextElement;
70
71	function ExtractPreviousTextElement( const TextStart: PChar;
72	TextPointer: PChar;
73	Var NextElement: PChar ): TTextElement;
74
75	// Parse a color name or value (#hexval). Returns true if valid
76	function GetTagColor( const ColorParam: string;
77	var Color: TColor ): boolean;
78
79	function GetTagTextAlignment( const AlignParam: string;
80	const Default: TTextAlignment ): TTextAlignment;
81
82	function GetTagTextWrap( const WrapParam: string ): boolean;
83
84	// Search within a rich text document for the given text
85	// if found, returns true, pMatch is set to the first match,
86	// and MatchLength returns the length of the match
87	// (which may be greater than the length of Text due to
88	// to skipping tags)
89	// if not found, returns false, pMatch is set to nil
90	function RichTextFindString( pRichText: PChar;
91	const Text: string;
92	var pMatch: PChar;
93	var MatchLength: longint ): boolean;
94
95	// Returns the start of the previous word,
96	// or the current word if pStart is in the middle of the word
97	function RichTextWordLeft( pRichText: PChar;
98	pStart: PChar ): PChar;
99
100	// Returns the start of the next word.
101	function RichTextWordRight( pStart: PChar ): PChar;
102
103	// If pStart is in the middle of a word, then
104	// returns true and sets the start and length of the word
105	function RichTextWordAt( pRichText: PChar;
106	pStart: PChar;
107	Var pWordStart: PChar;
108	Var WordLength: longint ): boolean;
109
110	// Copies plaintext of richtext starting at StartP
111	// to the given buffer. Returns number of characters copied.
112	// Buffer may be nil
113	// If BufferLength is negative, it is effectively ignored
114	function CopyPlainTextToBuffer( StartP: PChar;
115	EndP: PChar;
116	Buffer: PChar;
117	BufferLength: longint ): longint;
118
119
120	// ALT begins
121	//
122
123	// Check for special text element types and adjust value accordingly.
124	procedure CheckSpecialElementType( const Character: Char;
125	var ElementType: TTextElementType;
126	var InsideDBC: Boolean;
127	const Codepage: LongInt );
128
129	// Returns true if the given byte value is a legally-wrappable single-byte
130	// character under the given Asian codepage.
131	function IsAsianWrapChar( const CharByte: Byte;
132	const Codepage: LongInt ): boolean;
133
134	// Returns true if the given byte value is the leading byte of a multi-byte
135	// character under the given Asian codepage.
136	function IsDBCSLeadByte( const CharByte: Byte;
137	const Codepage: LongInt ): boolean;
138
139	// Returns true if the given byte value is valid as a possible second byte of
140	// a multi-byte character (this does not guarantee that it IS one, just that
141	// it COULD be).
142	function IsDBCSSecondByte( const CharByte: Byte;
143	const Codepage: LongInt ): boolean;
144
145	// Adjusts the character position to the beginning of any multi-byte character.
146	procedure MoveToCharacterBoundary( TextPointer: PChar;
147	var Index: LongInt;
148	var Offset: LongInt;
149	RowStart: LongInt;
150	Codepage: LongInt );
151	//
152	// ALT ends
153
154
155	Implementation
156
157	uses
158	BseDOS, // for NLS/case mapping
159	SysUtils,
160	ACLStringUtility;
161
162	const
163	TagStr: array[ ttInvalid .. ttEnd ] of string =
164	(
165	'', //
166	'b',
167	'/b',
168	'i',
169	'/i',
170	'u',
171	'/u',
172	'tt',
173	'/tt',
174	'h1',
175	'h2',
176	'h3',
177	'/h',
178	'color',
179	'/color',
180	'backcolor',
181	'/backcolor',
182	'red',
183	'blue',
184	'green',
185	'black',
186	'wrap',
187	'align',
188	'link',
189	'/link',
190	'leftmargin',
191	'rightmargin',
192	'image',
193	'font',
194	'/font',
195	''
196	);
197
198	StandardColors: array[ 0..7 ] of TStandardColor =
199	(
200	( Name : 'white' ; Color: clWhite ),
201	( Name : 'black' ; Color: clBlack ),
202	( Name : 'red' ; Color: clRed ),
203	( Name : 'blue' ; Color: clBlue ),
204	( Name : 'green' ; Color: clLime ),
205	( Name : 'purple'; Color: clFuchsia ),
206	( Name : 'yellow'; Color: clYellow ),
207	( Name : 'cyan' ; Color: clAqua )
208	);
209
210	Procedure ParseTag( const Text: string;
211	Var Tag: TTag );
212	var
213	TagType: TTagType;
214	TagTypeText: string;
215	SpacePos: longint;
216	begin
217	SpacePos := Pos( ' ', Text );
218	if SpacePos <> 0 then
219	begin
220	Tag.Arguments := trim( Copy( Text, SpacePos + 1, 255 ) );
221	TagTypeText := LowerCase( Copy( Text, 1, SpacePos - 1 ) );
222	end
223	else
224	begin
225	Tag.Arguments := ''; // to save time copying when not needed
226	TagTypeText := LowerCase( Text );
227	end;
228
229	for TagType := ttBold to ttEnd do
230	begin
231	if TagStr[ TagType ] = TagTypeText then
232	begin
233	Tag.TagType := TagType;
234	exit;
235	end;
236	end;
237
238	// not found
239	Tag.TagType := ttInvalid;
240	end;
241
242	var
243	TagText: string;
244	TagArgText: string;
245
246	Function ExtractTag( Var TextPointer: PChar ): TTag;
247	var
248	CurrentChar: Char;
249	TagTooLong: boolean;
250	InQuote: boolean;
251	begin
252	// assert( TextPointer[ 0 ] = '<' );
253	TagText := '';
254	TagTooLong := false;
255	InQuote := false;
256
257	repeat
258	CurrentChar := TextPointer^;
259
260	if ( CurrentChar = '>' )
261	and ( not InQuote ) then
262	begin
263	// found tag end.
264	if TagTooLong then
265	Result.TagType := ttInvalid
266	else
267	ParseTag( TagText, Result );
268	inc( TextPointer );
269	exit;
270	end;
271
272	if CurrentChar = #0 then
273	begin
274	// if we reach here we have reached the end of text
275	// during a tag. invalid tag.
276	Result.TagType := ttInvalid;
277	exit;
278	end;
279
280	if CurrentChar = DoubleQuote then
281	begin
282	if not InQuote then
283	begin
284	InQuote := true
285	end
286	else
287	begin
288	// Could be escaped quote ""
289	if ( TextPointer + 1 ) ^ = DoubleQuote then
290	begin
291	// yes it is
292	inc( TextPointer ); // skip second one
293	end
294	else
295	begin
296	// no, not an escaped quote
297	InQuote := false;
298	end;
299	end;
300
301	end;
302
303	if not TagTooLong then
304	if Length( TagText ) < 200 then
305	TagText := TagText + CurrentChar
306	else
307	TagTooLong := true; // but keep going until the end
308
309	inc( TextPointer );
310	until false;
311
312	end;
313
314	// Expects textpointer is currently pointing at the >
315	Function ExtractPreviousTag( const TextStart: PChar;
316	Var TextPointer: PChar ): TTag;
317	var
318	CurrentChar: Char;
319	TagTooLong: boolean;
320	InQuote: boolean;
321	begin
322	TagText := '';
323	TagTooLong := false;
324	InQuote := false;
325
326	repeat
327	dec( TextPointer );
328	if TextPointer < TextStart then
329	begin
330	// if we reach here we have reached the end of text
331	// during a tag. invalid tag.
332	Result.TagType := ttInvalid;
333	exit;
334	end;
335	CurrentChar := TextPointer^;
336
337	if ( CurrentChar = '<' )
338	and ( not InQuote ) then
339	begin
340	// found tag end.
341	if TagTooLong then
342	Result.TagType := ttInvalid
343	else
344	ParseTag( TagText, Result );
345	exit;
346	end;
347
348	if CurrentChar = DoubleQuote then
349	begin
350	if not InQuote then
351	begin
352	InQuote := true
353	end
354	else
355	begin
356	// Could be escaped quote ""
357	if TextPointer <= TextStart then
358	begin
359	// start of text... somethin weird
360	InQuote := false;
361	end
362	else if ( TextPointer - 1 ) ^ = DoubleQuote then
363	begin
364	// yes it is
365	dec( TextPointer ); // skip second one
366	end
367	else
368	begin
369	// no, not an escaped quote
370	InQuote := false;
371	end;
372	end;
373
374	end;
375
376	if not TagTooLong then
377	if Length( TagText ) < 200 then
378	TagText := CurrentChar + TagText
379	else
380	TagTooLong := true; // but keep going until the end
381
382	until false;
383
384	end;
385
386	function ExtractNextTextElement( TextPointer: PChar;
387	Var NextElement: PChar ): TTextElement;
388	var
389	TheChar: Char;
390	NextChar: Char;
391	begin
392	with Result do
393	begin
394	TheChar := TextPointer^;
395	Character := TheChar;
396	inc( TextPointer );
397
398
399	case TheChar of
400	' ': // ---- Space (word break) found ----
401	ElementType := teWordBreak;
402
403	#10, #13: // ---- End of line found ----
404	begin
405	ElementType := teLineBreak;
406	if TheChar = #13 then
407	begin
408	TheChar := TextPointer^;
409	if TheChar = #10 then
410	// skip CR following LF
411	inc( TextPointer );
412	end;
413	end;
414
415	#0: // ---- end of text found ----
416	ElementType := teTextEnd;
417
418	'<': // ---- tag found? ----
419	begin
420	NextChar := TextPointer^;
421	if NextChar = '<' then
422	begin
423	// no. just a literal <
424	ElementType := teText;
425	inc( TextPointer );
426	end
427	else
428	begin
429	Tag := ExtractTag( TextPointer );
430	if Tag.TagType = ttImage then
431	ElementType := teImage
432	else
433	ElementType := teStyle;
434	end;
435
436	end;
437
438	'>': // check - should be double
439	begin
440	ElementType := teText;
441	NextChar := TextPointer^;
442	if NextChar = '>' then
443	inc( TextPointer );
444	end;
445
446	// '-': // ---- Hyphen (ALT)
447	// ElementType := teWrapChar;
448
449	else
450	ElementType := teText;
451	end;
452
453	end; // with
454
455	NextElement := TextPointer;
456	end;
457
458	function ExtractPreviousTextElement( const TextStart: PChar;
459	TextPointer: PChar;
460	Var NextElement: PChar ): TTextElement;
461	var
462	TheChar: Char;
463	PreviousChar: Char;
464	FoundTag: boolean;
465	begin
466	with Result do
467	begin
468	dec( TextPointer );
469	TheChar := TextPointer^;
470	Character := TheChar;
471	if TextPointer < TextStart then
472	begin
473	ElementType := teTextEnd;
474	exit;
475	end;
476
477	case TheChar of
478	' ': // ---- Space (word break) found ----
479	ElementType := teWordBreak;
480
481	#10, #13: // ---- End of line found ----
482	begin
483	ElementType := teLineBreak;
484	if TheChar = #10 then
485	begin
486	dec( TextPointer );
487	TheChar := TextPointer^;
488	if TheChar = #13 then
489	begin
490	// skip CR preceeding LF
491	end
492	else
493	inc( TextPointer );
494	end;
495	end;
496
497	'>': // ---- tag found ----
498	begin
499	FoundTag := true;
500	if TextPointer > TextStart then
501	begin
502	PreviousChar := ( TextPointer - 1 )^;
503	if PreviousChar = '>' then
504	begin
505	// no. just a literal >
506	FoundTag := false;
507	ElementType := teText;
508	dec( TextPointer );
509	end
510	end;
511
512	if FoundTag then
513	begin
514	Tag := ExtractPreviousTag( TextStart, TextPointer );
515	if Tag.TagType = ttImage then
516	ElementType := teImage
517	else
518	ElementType := teStyle;
519	end;
520	end;
521
522	'<': // should be double
523	begin
524	ElementType := teText;
525	if TextPointer > TextStart then
526	begin
527	PreviousChar := TextPointer^;
528	if PreviousChar = '<' then
529	dec( TextPointer );
530	end;
531	end
532
533	// '-': // ---- Hyphen (ALT)
534	// ElementType := teWrapChar;
535
536	else
537	ElementType := teText;
538	end;
539	end; // with
540	NextElement := TextPointer;
541	end;
542
543	function GetTagColor( const ColorParam: string;
544	var Color: TColor ): boolean;
545	var
546	ColorIndex: longint;
547	begin
548	Result := false;
549	if ColorParam <> '' then
550	begin
551	if ColorParam[ 1 ] = '#' then
552	begin
553	try
554	Color := HexToInt( StrRightFrom( ColorParam, 2 ) );
555	Result := true;
556	except
557	end;
558	end
559	else
560	begin
561	for ColorIndex := 0 to High( StandardColors ) do
562	begin
563	if StringsSame( ColorParam, StandardColors[ ColorIndex ].Name ) then
564	begin
565	Color := StandardColors[ ColorIndex ].Color;
566	Result := true;
567	break;
568	end;
569	end;
570	end;
571	end;
572	end;
573
574	function GetTagTextAlignment( const AlignParam: string;
575	const Default: TTextAlignment ): TTextAlignment;
576	begin
577	if StringsSame( AlignParam, 'left' ) then
578	Result := taLeft
579	else if StringsSame( AlignParam, 'center' ) then
580	Result := taCenter
581	else if StringsSame( AlignParam, 'right' ) then
582	Result := taRight
583	else
584	Result := Default;
585	end;
586
587	function GetTagTextWrap( const WrapParam: string ): boolean;
588	begin
589	Result := StringsSame( WrapParam, 'yes' );
590	end;
591
592	function RichTextFindString( pRichText: PChar;
593	const Text: string;
594	var pMatch: PChar;
595	var MatchLength: longint ): boolean;
596	var
597	P: PChar;
598	NextP: PChar;
599	Element: TTextElement;
600	pMatchStart: pchar;
601	pMatchStartNext: pchar;
602	MatchIndex: longint;
603
604	CountryData: COUNTRYCODE;
605	CaseMap: array[ Low( Char )..High( Char ) ] of char;
606	C: Char;
607	begin
608	if Length( Text ) = 0 then
609	begin
610	// null string always matches
611	Result := true;
612	pMatch := pRichText;
613	MatchLength := 0;
614	exit;
615	end;
616
617	P := pRichText;
618
619	MatchIndex := 1;
620
621	// Get case mapping of all chars (only SBCS)
622
623	CountryData.Country := 0; // default country
624	CountryData.CodePage := 0; // default codepage
625
626	// fill array with all chars
627	for C := Low( CaseMap ) to High( CaseMap ) do
628	CaseMap[ C ] := C;
629
630	DosMapCase( sizeof( CaseMap ),
631	CountryData,
632	CaseMap );
633
634	// Now search, case insensitively
635
636	while true do
637	begin
638	Element := ExtractNextTextElement( P, NextP );
639
640	case Element.ElementType of
641	teTextEnd:
642	// end of text
643	break;
644
645	teImage,
646	teLineBreak:
647	// breaks a potential match
648	MatchIndex := 1;
649
650	teStyle:
651	; // ignore, matches can continue
652
653	else
654	begin
655	if CaseMap[ Element.Character ]
656	= CaseMap[ Text[ MatchIndex ] ] then
657	begin
658	// found a match
659	if MatchIndex = 1 then
660	begin
661	pMatchStart := P; // store start of match
662	pMatchStartNext := NextP;
663	end;
664
665	inc( MatchIndex );
666	if MatchIndex > Length( Text ) then
667	begin
668	// found a complete match
669	Result := true;
670	pMatch := pMatchStart;
671	MatchLength := PCharDiff( P, pMatchStart )
672	+ 1; // include this char
673	exit;
674	end;
675	end
676	else
677	begin
678	// not a match
679	if MatchIndex > 1 then
680	begin
681	// go back to start of match, + 1
682	NextP := pMatchStartNext;
683	MatchIndex := 1;
684	end;
685	end;
686	end;
687	end;
688
689	P := NextP;
690	end;
691
692	// no match found
693	Result := false;
694	pMatch := nil;
695	MatchLength := 0;
696	end;
697
698	function RichTextWordLeft( pRichText: PChar;
699	pStart: PChar ): PChar;
700	Var
701	P: PChar;
702	NextP: PChar;
703	Element: TTextElement;
704	begin
705	P := pStart;
706
707	// skip whitespace/tags...
708	Element := ExtractPreviousTextElement( pRichText, P, NextP );
709	P := NextP;
710	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
711	begin
712	Element := ExtractPreviousTextElement( pRichText, P, NextP );
713	P := NextP;
714	end;
715	if Element.ElementType = teTextEnd then
716	begin
717	Result := P;
718	// out of text
719	exit;
720	end;
721
722	// back to start of word, skip text/tags
723	while true do
724	begin
725	Element := ExtractPreviousTextElement( pRichText, P, NextP );
726	if not ( Element.ElementType in [ teText, teStyle ] ) then
727	break;
728	P := NextP;
729	end;
730	Result := P;
731	end;
732
733	function RichTextWordRight( pStart: PChar ): PChar;
734	Var
735	P: PChar;
736	NextP: PChar;
737	Element: TTextElement;
738	begin
739	P := pStart;
740
741	// skip text/tags...
742	Element := ExtractNextTextElement( P, NextP );
743	while Element.ElementType in [ teStyle, teText ] do
744	begin
745	P := NextP;
746	Element := ExtractNextTextElement( P, NextP );
747	end;
748	if Element.ElementType <> teTextEnd then
749	begin
750	// skip whitespace
751	Element := ExtractNextTextElement( P, NextP );
752	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
753	begin
754	P := NextP;
755	Element := ExtractNextTextElement( P, NextP );
756	end;
757	end;
758
759	Result := P;
760	end;
761
762	function RichTextWordAt( pRichText: PChar;
763	pStart: PChar;
764	Var pWordStart: PChar;
765	Var WordLength: longint ): boolean;
766	Var
767	P: PChar;
768	NextP: PChar;
769	Element: TTextElement;
770	pWordEnd: PChar;
771	begin
772	P := pStart;
773	Element := ExtractNextTextElement( P, NextP );
774	if not ( Element.ElementType in [ teStyle, teText ] ) then
775	begin
776	// not in a word.
777	result := false;
778	pWordStart := nil;
779	WordLength := 0;
780	exit;
781	end;
782	// find end of the word
783	while Element.ElementType in [ teStyle, teText ] do
784	begin
785	P := NextP;
786	Element := ExtractNextTextElement( P, NextP );
787	end;
788	pWordEnd := P;
789
790	P := pStart;
791	Element := ExtractPreviousTextElement( pRichText, P, NextP );
792	while Element.ElementType in [ teStyle, teText ] do
793	begin
794	P := NextP;
795	Element := ExtractPreviousTextElement( pRichText, P, NextP );
796	end;
797	pWordStart := P;
798	WordLength := PCharDiff( pWordEnd, pWordStart );
799	Result := true;
800	end;
801
802	function CopyPlainTextToBuffer( StartP: PChar;
803	EndP: PChar;
804	Buffer: PChar;
805	BufferLength: longint ): longint;
806	var
807	Q: PChar;
808	EndQ: Pchar;
809	P: PChar;
810	NextP: PChar;
811	Element: TTextElement;
812	begin
813	P := StartP;
814	Q := Buffer;
815	EndQ := Buffer + BufferLength;
816
817	while P < EndP do
818	begin
819	Element := ExtractNextTextElement( P, NextP );
820	case Element.ElementType of
821	teText, teWordBreak:
822	begin
823	// copy char
824	if Buffer <> nil then
825	Q[ 0 ] := Element.Character;
826	inc( Q );
827	end;
828
829	teLineBreak:
830	begin
831	if Buffer <> nil then
832	Q[ 0 ] := #13;
833	inc( Q );
834	if Q = EndQ then
835	// end of buffer
836	break;
837
838	if Buffer <> nil then
839	Q[ 0 ] := #10;
840	inc( Q );
841	end;
842	end;
843
844	if Q = EndQ then
845	// end of buffer
846	break;
847
848	P := NextP;
849	end;
850	result := PCharDiff( Q, Buffer );
851	end;
852
853	// ALT begins
854	//
855	// Check for special text element types that depend on context.
856	//
857	procedure CheckSpecialElementType( const Character: Char;
858	var ElementType: TTextElementType;
859	var InsideDBC: Boolean;
860	const Codepage: LongInt );
861	var
862	CharByte: Byte;
863	begin
864	if Codepage in [ 874, 932, 936, 942, 943, 949, 950, 1381, 1386 ] then
865	begin
866	CharByte := ord( Character );
867	if InsideDBC then
868	begin
869	InsideDBC := false;
870	// sanity check for corrupt text sequence (definitely not foolproof)
871	if IsDBCSSecondByte( CharByte, Codepage ) then
872	ElementType := teSecondByte
873	else
874	ElementType := teText;
875	end
876	else
877	begin
878	if IsAsianWrapChar( CharByte, Codepage ) then
879	begin
880	ElementType := teWrapChar;
881	InsideDBC := false;
882	end
883	else if IsDBCSLeadByte( CharByte, Codepage ) then
884	begin
885	ElementType := teLeadByte;
886	InsideDBC := true;
887	end;
888	end;
889	end;
890	end;
891
892	// Check if this (single-byte) character is a legal wrap point under certain
893	// Asian codepages. This is really only used for Thai and for Japanese
894	// half-width katakana; other DBCS languages use double-byte characters for all
895	// their native glyphs.
896	//
897	function IsAsianWrapChar( const CharByte: Byte;
898	const Codepage: LongInt ): boolean;
899	begin
900	Result := false;
901
902	if ( CharByte < $80) then
903	exit;
904
905	case Codepage of
906	932, 942, 943: // Japanese
907	if CharByte in [ $A2, $A6, $B1..$DD ] then
908	Result := true;
909	874: // Thai
910	Result := true;
911	end;
912	end;
913
914	// Check if this is the lead byte of a double-byte character. This is essential
915	// to know in certain cases:
916	// - Nothing must ever be inserted between such a byte and the next byte
917	// (e.g. line break, tag, etc).
918	// - Cursor position must never be set between such a byte and the next byte.
919	// - Selection state must never change between such a byte and the next byte.
920	//
921	function IsDBCSLeadByte( const CharByte: Byte;
922	const Codepage: LongInt ): boolean;
923	begin
924	Result := false;
925
926	case Codepage of
927	932, 942, 943: // Japanese
928	if CharByte in [ $81..$9F, $E0..$FC ] then
929	Result := true;
930	949: // Korean KSC
931	if CharByte in [ $85..$FE ] then
932	Result := true;
933	1381: // Chinese GB2312
934	if CharByte in [ $8C..$FE ] then
935	Result := true;
936	936, 950, 1386: // Chinese BIG-5 or GBK
937	if CharByte in [ $81..$FE ] then
938	Result := true;
939	end;
940	end;
941
942	// Check to see if this byte is a valid second byte in a double-byte character.
943	// (This doesn't guarantee that it IS such a byte, only that it COULD be. The
944	// caller is assumed to know whether we're in a double byte character or not.)
945	//
946	function IsDBCSSecondByte( const CharByte: Byte;
947	const Codepage: LongInt ): boolean;
948	begin
949	Result := false;
950
951	case Codepage of
952	932, 936, 942, 943, 949, 950, 1386:
953	if CharByte >= $40 then
954	Result := true;
955	1381:
956	if CharByte >= $A1 then
957	Result := true;
958	end;
959	end;
960
961	// Given a string position, check to see if it's in the middle of a double-byte
962	// character; if so, move back by one position so that we're sitting immediately
963	// in front of the double-byte character instead.
964	//
965	procedure MoveToCharacterBoundary( TextPointer: PChar;
966	var Index: LongInt;
967	var Offset: LongInt;
968	RowStart: LongInt;
969	Codepage: LongInt );
970	var
971	P: PChar;
972	NextP: PChar;
973	Element: TTextElement;
974	InsideDBC: boolean;
975	begin
976	if ( Offset > 0 ) and
977	( Codepage in [ 932, 936, 942, 943, 949, 950, 1381, 1386 ]) then
978	begin
979	P := TextPointer + RowStart;
980	InsideDBC := false;
981
982	// Because parsing of byte types is state based, we must verify every
983	// byte's type from the beginning of the line until we reach the target.
984	while RowStart < Index do
985	begin
986	Element := ExtractNextTextElement( P, NextP );
987	CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
988	P := NextP;
989	inc( RowStart );
990	end;
991	{
992	// We've reached the target position, and the current parsing state should
993	// be correctly set. So now we can safely determine the target byte's type.
994	Element := ExtractNextTextElement( P, NextP );
995	CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
996	}
997	if InsideDBC then
998	begin
999	// If this the first byte of a double byte character, move back by one.
1000	dec( Index );
1001	dec( Offset );
1002	end;
1003	end;
1004
1005	end;
1006	//
1007	// ALT ends
1008
1009	Initialization
1010	End.

Note: See TracBrowser for help on using the repository browser.

Download in other formats: