Context Navigation

source: trunk/Components/RichTextDocumentUnit.pas@ 420

Last change on this file since 420 was 420, checked in by ataylor, 6 years ago
Improve DBCS string width calculations, other small tweaks.
Property svn:eol-style set to `native`
File size: 25.6 KB

Line
1	Unit RichTextDocumentUnit;
2
3	// Declarations of tags, and parsing functions
4
5	Interface
6
7	uses
8	Classes;
9
10	type
11	TTagType = ( ttInvalid,
12	ttBold, ttBoldOff,
13	ttItalic, ttItalicOff,
14	ttUnderline, ttUnderlineOff,
15	ttFixedWidthOn, ttFixedWidthOff,
16	ttHeading1, ttHeading2, ttHeading3, ttHeadingOff,
17	ttColor, ttColorOff,
18	ttBackgroundColor, ttBackgroundColorOff,
19	ttRed, ttBlue, ttGreen, ttBlack,
20	ttWrap,
21	ttAlign,
22	ttBeginLink, ttEndLink,
23	ttSetLeftMargin, ttSetRightMargin,
24	ttImage,
25	ttFont, ttFontOff,
26	ttEnd );
27
28	TStandardColor = record
29	Name: string[ 32 ];
30	Color: TColor;
31	end;
32
33	TTag = record
34	TagType: TTagType;
35	Arguments: string;
36	end;
37
38	TTextElementType = ( teText, // a character
39	teWordBreak,
40	teLineBreak, // end of para
41	teTextEnd,
42	teImage,
43	teStyle,
44	teWrapChar, // A non-whitespace wrappable character (ALT)
45	teLeadByte, // DBCS lead byte (ALT)
46	teSecondByte ); // DBCS secondary byte (ALT)
47
48	TTextElement = record
49	ElementType: TTextElementType;
50	Character: Char;
51	Tag: TTag;
52	end;
53
54	TTextAlignment = ( taLeft,
55	taRight,
56	taCenter );
57
58	// Returns tag pointed to by TextPointer and
59	// moves TextPointer to the first char after the tag.
60	Function ExtractTag( Var TextPointer: PChar ): TTag;
61
62	// Returns tag ending at TextPointer
63	// (Expects textpointer is currently pointing at the >)
64	// and moves TextPointer to the first char of the tag
65	Function ExtractPreviousTag( const TextStart: PChar;
66	Var TextPointer: PChar ): TTag;
67
68	function ExtractNextTextElement( TextPointer: PChar;
69	Var NextElement: PChar ): TTextElement;
70
71	function ExtractPreviousTextElement( const TextStart: PChar;
72	TextPointer: PChar;
73	Var NextElement: PChar ): TTextElement;
74
75	// Parse a color name or value (#hexval). Returns true if valid
76	function GetTagColor( const ColorParam: string;
77	var Color: TColor ): boolean;
78
79	function GetTagTextAlignment( const AlignParam: string;
80	const Default: TTextAlignment ): TTextAlignment;
81
82	function GetTagTextWrap( const WrapParam: string ): boolean;
83
84	// Search within a rich text document for the given text
85	// if found, returns true, pMatch is set to the first match,
86	// and MatchLength returns the length of the match
87	// (which may be greater than the length of Text due to
88	// to skipping tags)
89	// if not found, returns false, pMatch is set to nil
90	function RichTextFindString( pRichText: PChar;
91	const Text: string;
92	var pMatch: PChar;
93	var MatchLength: longint ): boolean;
94
95	// Returns the start of the previous word,
96	// or the current word if pStart is in the middle of the word
97	function RichTextWordLeft( pRichText: PChar;
98	pStart: PChar ): PChar;
99
100	// Returns the start of the next word.
101	function RichTextWordRight( pStart: PChar ): PChar;
102
103	// If pStart is in the middle of a word, then
104	// returns true and sets the start and length of the word
105	function RichTextWordAt( pRichText: PChar;
106	pStart: PChar;
107	Var pWordStart: PChar;
108	Var WordLength: longint ): boolean;
109
110	// Copies plaintext of richtext starting at StartP
111	// to the given buffer. Returns number of characters copied.
112	// Buffer may be nil
113	// If BufferLength is negative, it is effectively ignored
114	function CopyPlainTextToBuffer( StartP: PChar;
115	EndP: PChar;
116	Buffer: PChar;
117	BufferLength: longint ): longint;
118
119
120	// ALT begins
121	//
122
123	// Check for special text element types and adjust value accordingly.
124	procedure CheckSpecialElementType( const Character: Char;
125	var ElementType: TTextElementType;
126	var InsideDBC: Boolean;
127	const Codepage: LongInt );
128
129	// Returns true if the given byte value is a legally-wrappable single-byte
130	// character under the given Asian codepage.
131	function IsAsianWrapChar( const CharByte: Byte;
132	const Codepage: LongInt ): boolean;
133
134	// Returns true if the given byte value is the leading byte of a multi-byte
135	// character under the given Asian codepage.
136	function IsDBCSLeadByte( const CharByte: Byte;
137	const Codepage: LongInt ): boolean;
138
139	// Returns true if the given byte value is valid as a possible second byte of
140	// a multi-byte character (this does not guarantee that it IS one, just that
141	// it COULD be).
142	function IsDBCSSecondByte( const CharByte: Byte;
143	const Codepage: LongInt ): boolean;
144
145	// Adjusts the character position to the beginning or end of any multi-byte
146	// character.
147	procedure MoveToCharacterBoundary( TextPointer: PChar;
148	var Index: LongInt;
149	var Offset: LongInt;
150	RowStart: LongInt;
151	Codepage: LongInt;
152	Advance: Boolean; );
153	//
154	// ALT ends
155
156
157	Implementation
158
159	uses
160	BseDOS, // for NLS/case mapping
161	SysUtils,
162	ACLStringUtility;
163
164	const
165	TagStr: array[ ttInvalid .. ttEnd ] of string =
166	(
167	'', //
168	'b',
169	'/b',
170	'i',
171	'/i',
172	'u',
173	'/u',
174	'tt',
175	'/tt',
176	'h1',
177	'h2',
178	'h3',
179	'/h',
180	'color',
181	'/color',
182	'backcolor',
183	'/backcolor',
184	'red',
185	'blue',
186	'green',
187	'black',
188	'wrap',
189	'align',
190	'link',
191	'/link',
192	'leftmargin',
193	'rightmargin',
194	'image',
195	'font',
196	'/font',
197	''
198	);
199
200	StandardColors: array[ 0..7 ] of TStandardColor =
201	(
202	( Name : 'white' ; Color: clWhite ),
203	( Name : 'black' ; Color: clBlack ),
204	( Name : 'red' ; Color: clRed ),
205	( Name : 'blue' ; Color: clBlue ),
206	( Name : 'green' ; Color: clLime ),
207	( Name : 'purple'; Color: clFuchsia ),
208	( Name : 'yellow'; Color: clYellow ),
209	( Name : 'cyan' ; Color: clAqua )
210	);
211
212	Procedure ParseTag( const Text: string;
213	Var Tag: TTag );
214	var
215	TagType: TTagType;
216	TagTypeText: string;
217	SpacePos: longint;
218	begin
219	SpacePos := Pos( ' ', Text );
220	if SpacePos <> 0 then
221	begin
222	Tag.Arguments := trim( Copy( Text, SpacePos + 1, 255 ) );
223	TagTypeText := LowerCase( Copy( Text, 1, SpacePos - 1 ) );
224	end
225	else
226	begin
227	Tag.Arguments := ''; // to save time copying when not needed
228	TagTypeText := LowerCase( Text );
229	end;
230
231	for TagType := ttBold to ttEnd do
232	begin
233	if TagStr[ TagType ] = TagTypeText then
234	begin
235	Tag.TagType := TagType;
236	exit;
237	end;
238	end;
239
240	// not found
241	Tag.TagType := ttInvalid;
242	end;
243
244	var
245	TagText: string;
246	TagArgText: string;
247
248	Function ExtractTag( Var TextPointer: PChar ): TTag;
249	var
250	CurrentChar: Char;
251	TagTooLong: boolean;
252	InQuote: boolean;
253	begin
254	// assert( TextPointer[ 0 ] = '<' );
255	TagText := '';
256	TagTooLong := false;
257	InQuote := false;
258
259	repeat
260	CurrentChar := TextPointer^;
261
262	if ( CurrentChar = '>' )
263	and ( not InQuote ) then
264	begin
265	// found tag end.
266	if TagTooLong then
267	Result.TagType := ttInvalid
268	else
269	ParseTag( TagText, Result );
270	inc( TextPointer );
271	exit;
272	end;
273
274	if CurrentChar = #0 then
275	begin
276	// if we reach here we have reached the end of text
277	// during a tag. invalid tag.
278	Result.TagType := ttInvalid;
279	exit;
280	end;
281
282	if CurrentChar = DoubleQuote then
283	begin
284	if not InQuote then
285	begin
286	InQuote := true
287	end
288	else
289	begin
290	// Could be escaped quote ""
291	if ( TextPointer + 1 ) ^ = DoubleQuote then
292	begin
293	// yes it is
294	inc( TextPointer ); // skip second one
295	end
296	else
297	begin
298	// no, not an escaped quote
299	InQuote := false;
300	end;
301	end;
302
303	end;
304
305	if not TagTooLong then
306	if Length( TagText ) < 200 then
307	TagText := TagText + CurrentChar
308	else
309	TagTooLong := true; // but keep going until the end
310
311	inc( TextPointer );
312	until false;
313
314	end;
315
316	// Expects textpointer is currently pointing at the >
317	Function ExtractPreviousTag( const TextStart: PChar;
318	Var TextPointer: PChar ): TTag;
319	var
320	CurrentChar: Char;
321	TagTooLong: boolean;
322	InQuote: boolean;
323	begin
324	TagText := '';
325	TagTooLong := false;
326	InQuote := false;
327
328	repeat
329	dec( TextPointer );
330	if TextPointer < TextStart then
331	begin
332	// if we reach here we have reached the end of text
333	// during a tag. invalid tag.
334	Result.TagType := ttInvalid;
335	exit;
336	end;
337	CurrentChar := TextPointer^;
338
339	if ( CurrentChar = '<' )
340	and ( not InQuote ) then
341	begin
342	// found tag end.
343	if TagTooLong then
344	Result.TagType := ttInvalid
345	else
346	ParseTag( TagText, Result );
347	exit;
348	end;
349
350	if CurrentChar = DoubleQuote then
351	begin
352	if not InQuote then
353	begin
354	InQuote := true
355	end
356	else
357	begin
358	// Could be escaped quote ""
359	if TextPointer <= TextStart then
360	begin
361	// start of text... somethin weird
362	InQuote := false;
363	end
364	else if ( TextPointer - 1 ) ^ = DoubleQuote then
365	begin
366	// yes it is
367	dec( TextPointer ); // skip second one
368	end
369	else
370	begin
371	// no, not an escaped quote
372	InQuote := false;
373	end;
374	end;
375
376	end;
377
378	if not TagTooLong then
379	if Length( TagText ) < 200 then
380	TagText := CurrentChar + TagText
381	else
382	TagTooLong := true; // but keep going until the end
383
384	until false;
385
386	end;
387
388	function ExtractNextTextElement( TextPointer: PChar;
389	Var NextElement: PChar ): TTextElement;
390	var
391	TheChar: Char;
392	NextChar: Char;
393	begin
394	with Result do
395	begin
396	TheChar := TextPointer^;
397	Character := TheChar;
398	inc( TextPointer );
399
400
401	case TheChar of
402	' ': // ---- Space (word break) found ----
403	ElementType := teWordBreak;
404
405	#10, #13: // ---- End of line found ----
406	begin
407	ElementType := teLineBreak;
408	if TheChar = #13 then
409	begin
410	TheChar := TextPointer^;
411	if TheChar = #10 then
412	// skip CR following LF
413	inc( TextPointer );
414	end;
415	end;
416
417	#0: // ---- end of text found ----
418	ElementType := teTextEnd;
419
420	'<': // ---- tag found? ----
421	begin
422	NextChar := TextPointer^;
423	if NextChar = '<' then
424	begin
425	// no. just a literal <
426	ElementType := teText;
427	inc( TextPointer );
428	end
429	else
430	begin
431	Tag := ExtractTag( TextPointer );
432	if Tag.TagType = ttImage then
433	ElementType := teImage
434	else
435	ElementType := teStyle;
436	end;
437
438	end;
439
440	'>': // check - should be double
441	begin
442	ElementType := teText;
443	NextChar := TextPointer^;
444	if NextChar = '>' then
445	inc( TextPointer );
446	end;
447
448	// '-': // ---- Hyphen (ALT)
449	// ElementType := teWrapChar;
450
451	else
452	ElementType := teText;
453	end;
454
455	end; // with
456
457	NextElement := TextPointer;
458	end;
459
460	function ExtractPreviousTextElement( const TextStart: PChar;
461	TextPointer: PChar;
462	Var NextElement: PChar ): TTextElement;
463	var
464	TheChar: Char;
465	PreviousChar: Char;
466	FoundTag: boolean;
467	begin
468	with Result do
469	begin
470	dec( TextPointer );
471	TheChar := TextPointer^;
472	Character := TheChar;
473	if TextPointer < TextStart then
474	begin
475	ElementType := teTextEnd;
476	exit;
477	end;
478
479	case TheChar of
480	' ': // ---- Space (word break) found ----
481	ElementType := teWordBreak;
482
483	#10, #13: // ---- End of line found ----
484	begin
485	ElementType := teLineBreak;
486	if TheChar = #10 then
487	begin
488	dec( TextPointer );
489	TheChar := TextPointer^;
490	if TheChar = #13 then
491	begin
492	// skip CR preceeding LF
493	end
494	else
495	inc( TextPointer );
496	end;
497	end;
498
499	'>': // ---- tag found ----
500	begin
501	FoundTag := true;
502	if TextPointer > TextStart then
503	begin
504	PreviousChar := ( TextPointer - 1 )^;
505	if PreviousChar = '>' then
506	begin
507	// no. just a literal >
508	FoundTag := false;
509	ElementType := teText;
510	dec( TextPointer );
511	end
512	end;
513
514	if FoundTag then
515	begin
516	Tag := ExtractPreviousTag( TextStart, TextPointer );
517	if Tag.TagType = ttImage then
518	ElementType := teImage
519	else
520	ElementType := teStyle;
521	end;
522	end;
523
524	'<': // should be double
525	begin
526	ElementType := teText;
527	if TextPointer > TextStart then
528	begin
529	PreviousChar := TextPointer^;
530	if PreviousChar = '<' then
531	dec( TextPointer );
532	end;
533	end
534
535	// '-': // ---- Hyphen (ALT)
536	// ElementType := teWrapChar;
537
538	else
539	ElementType := teText;
540	end;
541	end; // with
542	NextElement := TextPointer;
543	end;
544
545	function GetTagColor( const ColorParam: string;
546	var Color: TColor ): boolean;
547	var
548	ColorIndex: longint;
549	begin
550	Result := false;
551	if ColorParam <> '' then
552	begin
553	if ColorParam[ 1 ] = '#' then
554	begin
555	try
556	Color := HexToInt( StrRightFrom( ColorParam, 2 ) );
557	Result := true;
558	except
559	end;
560	end
561	else
562	begin
563	for ColorIndex := 0 to High( StandardColors ) do
564	begin
565	if StringsSame( ColorParam, StandardColors[ ColorIndex ].Name ) then
566	begin
567	Color := StandardColors[ ColorIndex ].Color;
568	Result := true;
569	break;
570	end;
571	end;
572	end;
573	end;
574	end;
575
576	function GetTagTextAlignment( const AlignParam: string;
577	const Default: TTextAlignment ): TTextAlignment;
578	begin
579	if StringsSame( AlignParam, 'left' ) then
580	Result := taLeft
581	else if StringsSame( AlignParam, 'center' ) then
582	Result := taCenter
583	else if StringsSame( AlignParam, 'right' ) then
584	Result := taRight
585	else
586	Result := Default;
587	end;
588
589	function GetTagTextWrap( const WrapParam: string ): boolean;
590	begin
591	Result := StringsSame( WrapParam, 'yes' );
592	end;
593
594	function RichTextFindString( pRichText: PChar;
595	const Text: string;
596	var pMatch: PChar;
597	var MatchLength: longint ): boolean;
598	var
599	P: PChar;
600	NextP: PChar;
601	Element: TTextElement;
602	pMatchStart: pchar;
603	pMatchStartNext: pchar;
604	MatchIndex: longint;
605
606	CountryData: COUNTRYCODE;
607	CaseMap: array[ Low( Char )..High( Char ) ] of char;
608	C: Char;
609	begin
610	if Length( Text ) = 0 then
611	begin
612	// null string always matches
613	Result := true;
614	pMatch := pRichText;
615	MatchLength := 0;
616	exit;
617	end;
618
619	P := pRichText;
620
621	MatchIndex := 1;
622
623	// Get case mapping of all chars (only SBCS)
624
625	CountryData.Country := 0; // default country
626	CountryData.CodePage := 0; // default codepage
627
628	// fill array with all chars
629	for C := Low( CaseMap ) to High( CaseMap ) do
630	CaseMap[ C ] := C;
631
632	DosMapCase( sizeof( CaseMap ),
633	CountryData,
634	CaseMap );
635
636	// Now search, case insensitively
637
638	while true do
639	begin
640	Element := ExtractNextTextElement( P, NextP );
641
642	case Element.ElementType of
643	teTextEnd:
644	// end of text
645	break;
646
647	teImage,
648	teLineBreak:
649	// breaks a potential match
650	MatchIndex := 1;
651
652	teStyle:
653	; // ignore, matches can continue
654
655	else
656	begin
657	if CaseMap[ Element.Character ]
658	= CaseMap[ Text[ MatchIndex ] ] then
659	begin
660	// found a match
661	if MatchIndex = 1 then
662	begin
663	pMatchStart := P; // store start of match
664	pMatchStartNext := NextP;
665	end;
666
667	inc( MatchIndex );
668	if MatchIndex > Length( Text ) then
669	begin
670	// found a complete match
671	Result := true;
672	pMatch := pMatchStart;
673	MatchLength := PCharDiff( P, pMatchStart )
674	+ 1; // include this char
675	exit;
676	end;
677	end
678	else
679	begin
680	// not a match
681	if MatchIndex > 1 then
682	begin
683	// go back to start of match, + 1
684	NextP := pMatchStartNext;
685	MatchIndex := 1;
686	end;
687	end;
688	end;
689	end;
690
691	P := NextP;
692	end;
693
694	// no match found
695	Result := false;
696	pMatch := nil;
697	MatchLength := 0;
698	end;
699
700	function RichTextWordLeft( pRichText: PChar;
701	pStart: PChar ): PChar;
702	Var
703	P: PChar;
704	NextP: PChar;
705	Element: TTextElement;
706	begin
707	P := pStart;
708
709	// skip whitespace/tags...
710	Element := ExtractPreviousTextElement( pRichText, P, NextP );
711	P := NextP;
712	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
713	begin
714	Element := ExtractPreviousTextElement( pRichText, P, NextP );
715	P := NextP;
716	end;
717	if Element.ElementType = teTextEnd then
718	begin
719	Result := P;
720	// out of text
721	exit;
722	end;
723
724	// back to start of word, skip text/tags
725	while true do
726	begin
727	Element := ExtractPreviousTextElement( pRichText, P, NextP );
728	if not ( Element.ElementType in [ teText, teStyle ] ) then
729	break;
730	P := NextP;
731	end;
732	Result := P;
733	end;
734
735	function RichTextWordRight( pStart: PChar ): PChar;
736	Var
737	P: PChar;
738	NextP: PChar;
739	Element: TTextElement;
740	begin
741	P := pStart;
742
743	// skip text/tags...
744	Element := ExtractNextTextElement( P, NextP );
745	while Element.ElementType in [ teStyle, teText ] do
746	begin
747	P := NextP;
748	Element := ExtractNextTextElement( P, NextP );
749	end;
750	if Element.ElementType <> teTextEnd then
751	begin
752	// skip whitespace
753	Element := ExtractNextTextElement( P, NextP );
754	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
755	begin
756	P := NextP;
757	Element := ExtractNextTextElement( P, NextP );
758	end;
759	end;
760
761	Result := P;
762	end;
763
764	function RichTextWordAt( pRichText: PChar;
765	pStart: PChar;
766	Var pWordStart: PChar;
767	Var WordLength: longint ): boolean;
768	Var
769	P: PChar;
770	NextP: PChar;
771	Element: TTextElement;
772	pWordEnd: PChar;
773	begin
774	P := pStart;
775	Element := ExtractNextTextElement( P, NextP );
776	if not ( Element.ElementType in [ teStyle, teText ] ) then
777	begin
778	// not in a word.
779	result := false;
780	pWordStart := nil;
781	WordLength := 0;
782	exit;
783	end;
784	// find end of the word
785	while Element.ElementType in [ teStyle, teText ] do
786	begin
787	P := NextP;
788	Element := ExtractNextTextElement( P, NextP );
789	end;
790	pWordEnd := P;
791
792	P := pStart;
793	Element := ExtractPreviousTextElement( pRichText, P, NextP );
794	while Element.ElementType in [ teStyle, teText ] do
795	begin
796	P := NextP;
797	Element := ExtractPreviousTextElement( pRichText, P, NextP );
798	end;
799	pWordStart := P;
800	WordLength := PCharDiff( pWordEnd, pWordStart );
801	Result := true;
802	end;
803
804	function CopyPlainTextToBuffer( StartP: PChar;
805	EndP: PChar;
806	Buffer: PChar;
807	BufferLength: longint ): longint;
808	var
809	Q: PChar;
810	EndQ: Pchar;
811	P: PChar;
812	NextP: PChar;
813	Element: TTextElement;
814	begin
815	P := StartP;
816	Q := Buffer;
817	EndQ := Buffer + BufferLength;
818
819	while P < EndP do
820	begin
821	Element := ExtractNextTextElement( P, NextP );
822	case Element.ElementType of
823	teText, teWordBreak:
824	begin
825	// copy char
826	if Buffer <> nil then
827	Q[ 0 ] := Element.Character;
828	inc( Q );
829	end;
830
831	teLineBreak:
832	begin
833	if Buffer <> nil then
834	Q[ 0 ] := #13;
835	inc( Q );
836	if Q = EndQ then
837	// end of buffer
838	break;
839
840	if Buffer <> nil then
841	Q[ 0 ] := #10;
842	inc( Q );
843	end;
844	end;
845
846	if Q = EndQ then
847	// end of buffer
848	break;
849
850	P := NextP;
851	end;
852	result := PCharDiff( Q, Buffer );
853	end;
854
855	// ALT begins
856	//
857	// Check for special text element types that depend on context.
858	//
859	procedure CheckSpecialElementType( const Character: Char;
860	var ElementType: TTextElementType;
861	var InsideDBC: Boolean;
862	const Codepage: LongInt );
863	var
864	CharByte: Byte;
865	begin
866	if Codepage in [ 874, 932, 936, 942, 943, 949, 950, 1381, 1386 ] then
867	begin
868	CharByte := ord( Character );
869	if InsideDBC then
870	begin
871	InsideDBC := false;
872	// sanity check for corrupt text sequence (definitely not foolproof)
873	if IsDBCSSecondByte( CharByte, Codepage ) then
874	ElementType := teSecondByte
875	else
876	ElementType := teText;
877	end
878	else
879	begin
880	if IsAsianWrapChar( CharByte, Codepage ) then
881	begin
882	ElementType := teWrapChar;
883	InsideDBC := false;
884	end
885	else if IsDBCSLeadByte( CharByte, Codepage ) then
886	begin
887	ElementType := teLeadByte;
888	InsideDBC := true;
889	end;
890	end;
891	end;
892	end;
893
894	// Check if this (single-byte) character is a legal wrap point under certain
895	// Asian codepages. This is really only used for Thai and for Japanese
896	// half-width katakana; other DBCS languages use double-byte characters for all
897	// their native glyphs.
898	//
899	function IsAsianWrapChar( const CharByte: Byte;
900	const Codepage: LongInt ): boolean;
901	begin
902	Result := false;
903
904	if ( CharByte < $80) then
905	exit;
906
907	case Codepage of
908	932, 942, 943: // Japanese
909	if CharByte in [ $A2, $A6, $B1..$DD ] then
910	Result := true;
911	874: // Thai
912	Result := true;
913	end;
914	end;
915
916	// Check if this is the lead byte of a double-byte character. This is essential
917	// to know in certain cases:
918	// - Nothing must ever be inserted between such a byte and the next byte
919	// (e.g. line break, tag, etc).
920	// - Cursor position must never be set between such a byte and the next byte.
921	// - Selection state must never change between such a byte and the next byte.
922	//
923	function IsDBCSLeadByte( const CharByte: Byte;
924	const Codepage: LongInt ): boolean;
925	begin
926	Result := false;
927
928	case Codepage of
929	932, 942, 943: // Japanese
930	if CharByte in [ $81..$9F, $E0..$FC ] then
931	Result := true;
932	949: // Korean KSC
933	if CharByte in [ $85..$FE ] then
934	Result := true;
935	1381: // Chinese GB2312
936	if CharByte in [ $8C..$FE ] then
937	Result := true;
938	936, 950, 1386: // Chinese BIG-5 or GBK
939	if CharByte in [ $81..$FE ] then
940	Result := true;
941	end;
942	end;
943
944	// Check to see if this byte is a valid second byte in a double-byte character.
945	// (This doesn't guarantee that it IS such a byte, only that it COULD be. The
946	// caller is assumed to know whether we're in a double byte character or not.)
947	//
948	function IsDBCSSecondByte( const CharByte: Byte;
949	const Codepage: LongInt ): boolean;
950	begin
951	Result := false;
952
953	case Codepage of
954	932, 936, 942, 943, 949, 950, 1386:
955	if CharByte >= $40 then
956	Result := true;
957	1381:
958	if CharByte >= $A1 then
959	Result := true;
960	end;
961	end;
962
963	// Given a string position, check to see if it's in the middle of a double-byte
964	// character; if so, move back by one position so that we're sitting immediately
965	// in front of the double-byte character instead.
966	//
967	procedure MoveToCharacterBoundary( TextPointer: PChar;
968	var Index: LongInt;
969	var Offset: LongInt;
970	RowStart: LongInt;
971	Codepage: LongInt;
972	Advance: Boolean; );
973	var
974	P: PChar;
975	NextP: PChar;
976	Element: TTextElement;
977	InsideDBC: boolean;
978	begin
979	if ( Offset > 0 ) and
980	( Codepage in [ 932, 936, 942, 943, 949, 950, 1381, 1386 ]) then
981	begin
982	P := TextPointer + RowStart;
983	InsideDBC := false;
984
985	// Because parsing of byte types is state based, we must verify every
986	// byte's type from the beginning of the line until we reach the target.
987	while RowStart < Index do
988	begin
989	Element := ExtractNextTextElement( P, NextP );
990	CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
991	P := NextP;
992	inc( RowStart );
993	end;
994
995	// We've reached the target position, and the current parsing state should
996	// be correctly set. So now we can safely determine the target byte's type.
997	Element := ExtractNextTextElement( P, NextP );
998	CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
999	if InsideDBC then
1000	begin
1001	// If this the first byte of a double byte character, move position by one.
1002	if Advance Then
1003	Begin
1004	inc( Index );
1005	inc( Offset );
1006	End
1007	Else
1008	Begin
1009	dec( Index );
1010	dec( Offset );
1011	End
1012	end;
1013	end;
1014
1015	end;
1016	//
1017	// ALT ends
1018
1019	Initialization
1020	End.

Note: See TracBrowser for help on using the repository browser.

Download in other formats: