Context Navigation

source: trunk/Components/RichTextDocumentUnit.pas@ 423

Last change on this file since 423 was 423, checked in by ataylor, 6 years ago
Updated release and dev notes. New reformatted readme.txt based on Aaron's original. Minor cleanup.
Property svn:eol-style set to `native`
File size: 23.4 KB

Rev	Line
[15]	1	Unit RichTextDocumentUnit;
	2
	3	// Declarations of tags, and parsing functions
	4
	5	Interface
	6
	7	uses
	8	Classes;
	9
	10	type
	11	TTagType = ( ttInvalid,
	12	ttBold, ttBoldOff,
	13	ttItalic, ttItalicOff,
	14	ttUnderline, ttUnderlineOff,
	15	ttFixedWidthOn, ttFixedWidthOff,
	16	ttHeading1, ttHeading2, ttHeading3, ttHeadingOff,
	17	ttColor, ttColorOff,
	18	ttBackgroundColor, ttBackgroundColorOff,
	19	ttRed, ttBlue, ttGreen, ttBlack,
	20	ttWrap,
	21	ttAlign,
	22	ttBeginLink, ttEndLink,
	23	ttSetLeftMargin, ttSetRightMargin,
	24	ttImage,
	25	ttFont, ttFontOff,
	26	ttEnd );
	27
	28	TStandardColor = record
	29	Name: string[ 32 ];
	30	Color: TColor;
	31	end;
	32
	33	TTag = record
	34	TagType: TTagType;
	35	Arguments: string;
	36	end;
	37
[418]	38	TTextElementType = ( teText, // a character
[15]	39	teWordBreak,
[418]	40	teLineBreak, // end of para
[15]	41	teTextEnd,
	42	teImage,
[418]	43	teStyle,
	44	teWrapChar, // A non-whitespace wrappable character (ALT)
	45	teLeadByte, // DBCS lead byte (ALT)
	46	teSecondByte ); // DBCS secondary byte (ALT)
[15]	47
	48	TTextElement = record
	49	ElementType: TTextElementType;
	50	Character: Char;
	51	Tag: TTag;
	52	end;
	53
	54	TTextAlignment = ( taLeft,
	55	taRight,
	56	taCenter );
	57
	58	// Returns tag pointed to by TextPointer and
	59	// moves TextPointer to the first char after the tag.
	60	Function ExtractTag( Var TextPointer: PChar ): TTag;
	61
	62	// Returns tag ending at TextPointer
	63	// (Expects textpointer is currently pointing at the >)
	64	// and moves TextPointer to the first char of the tag
	65	Function ExtractPreviousTag( const TextStart: PChar;
	66	Var TextPointer: PChar ): TTag;
	67
	68	function ExtractNextTextElement( TextPointer: PChar;
	69	Var NextElement: PChar ): TTextElement;
	70
	71	function ExtractPreviousTextElement( const TextStart: PChar;
	72	TextPointer: PChar;
	73	Var NextElement: PChar ): TTextElement;
	74
	75	// Parse a color name or value (#hexval). Returns true if valid
	76	function GetTagColor( const ColorParam: string;
	77	var Color: TColor ): boolean;
	78
	79	function GetTagTextAlignment( const AlignParam: string;
	80	const Default: TTextAlignment ): TTextAlignment;
	81
	82	function GetTagTextWrap( const WrapParam: string ): boolean;
	83
	84	// Search within a rich text document for the given text
	85	// if found, returns true, pMatch is set to the first match,
	86	// and MatchLength returns the length of the match
	87	// (which may be greater than the length of Text due to
	88	// to skipping tags)
	89	// if not found, returns false, pMatch is set to nil
	90	function RichTextFindString( pRichText: PChar;
	91	const Text: string;
	92	var pMatch: PChar;
	93	var MatchLength: longint ): boolean;
	94
	95	// Returns the start of the previous word,
	96	// or the current word if pStart is in the middle of the word
	97	function RichTextWordLeft( pRichText: PChar;
	98	pStart: PChar ): PChar;
	99
	100	// Returns the start of the next word.
	101	function RichTextWordRight( pStart: PChar ): PChar;
	102
	103	// If pStart is in the middle of a word, then
	104	// returns true and sets the start and length of the word
	105	function RichTextWordAt( pRichText: PChar;
	106	pStart: PChar;
	107	Var pWordStart: PChar;
	108	Var WordLength: longint ): boolean;
	109
	110	// Copies plaintext of richtext starting at StartP
	111	// to the given buffer. Returns number of characters copied.
	112	// Buffer may be nil
	113	// If BufferLength is negative, it is effectively ignored
	114	function CopyPlainTextToBuffer( StartP: PChar;
	115	EndP: PChar;
	116	Buffer: PChar;
	117	BufferLength: longint ): longint;
	118
[418]	119
	120	// ALT begins
	121	//
	122
	123	// Check for special text element types and adjust value accordingly.
	124	procedure CheckSpecialElementType( const Character: Char;
	125	var ElementType: TTextElementType;
	126	var InsideDBC: Boolean;
	127	const Codepage: LongInt );
	128
	129	// Returns true if the given byte value is a legally-wrappable single-byte
	130	// character under the given Asian codepage.
	131	function IsAsianWrapChar( const CharByte: Byte;
	132	const Codepage: LongInt ): boolean;
	133
	134	// Returns true if the given byte value is the leading byte of a multi-byte
	135	// character under the given Asian codepage.
	136	function IsDBCSLeadByte( const CharByte: Byte;
	137	const Codepage: LongInt ): boolean;
	138
	139	// Returns true if the given byte value is valid as a possible second byte of
	140	// a multi-byte character (this does not guarantee that it IS one, just that
	141	// it COULD be).
	142	function IsDBCSSecondByte( const CharByte: Byte;
	143	const Codepage: LongInt ): boolean;
	144	//
	145	// ALT ends
	146
	147
[15]	148	Implementation
	149
	150	uses
	151	BseDOS, // for NLS/case mapping
[405]	152	SysUtils,
	153	ACLStringUtility;
[15]	154
	155	const
	156	TagStr: array[ ttInvalid .. ttEnd ] of string =
	157	(
	158	'', //
	159	'b',
	160	'/b',
	161	'i',
	162	'/i',
	163	'u',
	164	'/u',
	165	'tt',
	166	'/tt',
	167	'h1',
	168	'h2',
	169	'h3',
	170	'/h',
	171	'color',
	172	'/color',
	173	'backcolor',
	174	'/backcolor',
	175	'red',
	176	'blue',
	177	'green',
	178	'black',
	179	'wrap',
	180	'align',
	181	'link',
	182	'/link',
	183	'leftmargin',
	184	'rightmargin',
	185	'image',
	186	'font',
	187	'/font',
	188	''
	189	);
	190
	191	StandardColors: array[ 0..7 ] of TStandardColor =
	192	(
	193	( Name : 'white' ; Color: clWhite ),
	194	( Name : 'black' ; Color: clBlack ),
	195	( Name : 'red' ; Color: clRed ),
	196	( Name : 'blue' ; Color: clBlue ),
	197	( Name : 'green' ; Color: clLime ),
	198	( Name : 'purple'; Color: clFuchsia ),
	199	( Name : 'yellow'; Color: clYellow ),
	200	( Name : 'cyan' ; Color: clAqua )
	201	);
	202
	203	Procedure ParseTag( const Text: string;
	204	Var Tag: TTag );
	205	var
	206	TagType: TTagType;
	207	TagTypeText: string;
	208	SpacePos: longint;
	209	begin
	210	SpacePos := Pos( ' ', Text );
	211	if SpacePos <> 0 then
	212	begin
	213	Tag.Arguments := trim( Copy( Text, SpacePos + 1, 255 ) );
	214	TagTypeText := LowerCase( Copy( Text, 1, SpacePos - 1 ) );
	215	end
	216	else
	217	begin
	218	Tag.Arguments := ''; // to save time copying when not needed
	219	TagTypeText := LowerCase( Text );
	220	end;
	221
	222	for TagType := ttBold to ttEnd do
	223	begin
	224	if TagStr[ TagType ] = TagTypeText then
	225	begin
	226	Tag.TagType := TagType;
	227	exit;
	228	end;
	229	end;
	230
	231	// not found
	232	Tag.TagType := ttInvalid;
	233	end;
	234
	235	var
	236	TagText: string;
	237	TagArgText: string;
	238
	239	Function ExtractTag( Var TextPointer: PChar ): TTag;
	240	var
	241	CurrentChar: Char;
	242	TagTooLong: boolean;
	243	InQuote: boolean;
	244	begin
	245	// assert( TextPointer[ 0 ] = '<' );
	246	TagText := '';
	247	TagTooLong := false;
	248	InQuote := false;
	249
	250	repeat
	251	CurrentChar := TextPointer^;
	252
	253	if ( CurrentChar = '>' )
	254	and ( not InQuote ) then
	255	begin
	256	// found tag end.
	257	if TagTooLong then
	258	Result.TagType := ttInvalid
	259	else
	260	ParseTag( TagText, Result );
	261	inc( TextPointer );
	262	exit;
	263	end;
	264
	265	if CurrentChar = #0 then
	266	begin
	267	// if we reach here we have reached the end of text
	268	// during a tag. invalid tag.
	269	Result.TagType := ttInvalid;
	270	exit;
	271	end;
	272
	273	if CurrentChar = DoubleQuote then
	274	begin
	275	if not InQuote then
	276	begin
	277	InQuote := true
	278	end
	279	else
	280	begin
	281	// Could be escaped quote ""
	282	if ( TextPointer + 1 ) ^ = DoubleQuote then
	283	begin
	284	// yes it is
	285	inc( TextPointer ); // skip second one
	286	end
	287	else
	288	begin
	289	// no, not an escaped quote
	290	InQuote := false;
	291	end;
	292	end;
	293
	294	end;
	295
	296	if not TagTooLong then
	297	if Length( TagText ) < 200 then
	298	TagText := TagText + CurrentChar
	299	else
	300	TagTooLong := true; // but keep going until the end
	301
	302	inc( TextPointer );
	303	until false;
	304
	305	end;
	306
	307	// Expects textpointer is currently pointing at the >
	308	Function ExtractPreviousTag( const TextStart: PChar;
	309	Var TextPointer: PChar ): TTag;
	310	var
	311	CurrentChar: Char;
	312	TagTooLong: boolean;
	313	InQuote: boolean;
	314	begin
	315	TagText := '';
	316	TagTooLong := false;
	317	InQuote := false;
	318
	319	repeat
	320	dec( TextPointer );
	321	if TextPointer < TextStart then
	322	begin
	323	// if we reach here we have reached the end of text
	324	// during a tag. invalid tag.
	325	Result.TagType := ttInvalid;
	326	exit;
	327	end;
	328	CurrentChar := TextPointer^;
	329
	330	if ( CurrentChar = '<' )
	331	and ( not InQuote ) then
	332	begin
	333	// found tag end.
	334	if TagTooLong then
	335	Result.TagType := ttInvalid
	336	else
	337	ParseTag( TagText, Result );
	338	exit;
	339	end;
	340
	341	if CurrentChar = DoubleQuote then
	342	begin
	343	if not InQuote then
	344	begin
	345	InQuote := true
	346	end
	347	else
	348	begin
	349	// Could be escaped quote ""
	350	if TextPointer <= TextStart then
	351	begin
	352	// start of text... somethin weird
	353	InQuote := false;
	354	end
	355	else if ( TextPointer - 1 ) ^ = DoubleQuote then
	356	begin
	357	// yes it is
	358	dec( TextPointer ); // skip second one
	359	end
	360	else
	361	begin
	362	// no, not an escaped quote
	363	InQuote := false;
	364	end;
	365	end;
	366
	367	end;
	368
	369	if not TagTooLong then
	370	if Length( TagText ) < 200 then
	371	TagText := CurrentChar + TagText
	372	else
	373	TagTooLong := true; // but keep going until the end
	374
	375	until false;
	376
	377	end;
	378
	379	function ExtractNextTextElement( TextPointer: PChar;
	380	Var NextElement: PChar ): TTextElement;
	381	var
	382	TheChar: Char;
[418]	383	NextChar: Char;
[15]	384	begin
	385	with Result do
	386	begin
	387	TheChar := TextPointer^;
	388	Character := TheChar;
	389	inc( TextPointer );
	390
[418]	391
[15]	392	case TheChar of
	393	' ': // ---- Space (word break) found ----
	394	ElementType := teWordBreak;
	395
	396	#10, #13: // ---- End of line found ----
	397	begin
	398	ElementType := teLineBreak;
	399	if TheChar = #13 then
	400	begin
	401	TheChar := TextPointer^;
	402	if TheChar = #10 then
	403	// skip CR following LF
	404	inc( TextPointer );
	405	end;
	406	end;
	407
	408	#0: // ---- end of text found ----
	409	ElementType := teTextEnd;
	410
	411	'<': // ---- tag found? ----
	412	begin
	413	NextChar := TextPointer^;
	414	if NextChar = '<' then
	415	begin
	416	// no. just a literal <
	417	ElementType := teText;
	418	inc( TextPointer );
	419	end
	420	else
	421	begin
	422	Tag := ExtractTag( TextPointer );
	423	if Tag.TagType = ttImage then
	424	ElementType := teImage
	425	else
	426	ElementType := teStyle;
	427	end;
	428
	429	end;
	430
	431	'>': // check - should be double
	432	begin
	433	ElementType := teText;
	434	NextChar := TextPointer^;
	435	if NextChar = '>' then
	436	inc( TextPointer );
	437	end;
	438
[418]	439	// '-': // ---- Hyphen (ALT)
	440	// ElementType := teWrapChar;
	441
[15]	442	else
	443	ElementType := teText;
	444	end;
[418]	445
[15]	446	end; // with
[418]	447
[15]	448	NextElement := TextPointer;
	449	end;
	450
	451	function ExtractPreviousTextElement( const TextStart: PChar;
	452	TextPointer: PChar;
	453	Var NextElement: PChar ): TTextElement;
	454	var
	455	TheChar: Char;
	456	PreviousChar: Char;
	457	FoundTag: boolean;
	458	begin
	459	with Result do
	460	begin
	461	dec( TextPointer );
	462	TheChar := TextPointer^;
	463	Character := TheChar;
	464	if TextPointer < TextStart then
	465	begin
	466	ElementType := teTextEnd;
	467	exit;
	468	end;
	469
	470	case TheChar of
	471	' ': // ---- Space (word break) found ----
	472	ElementType := teWordBreak;
	473
	474	#10, #13: // ---- End of line found ----
	475	begin
	476	ElementType := teLineBreak;
	477	if TheChar = #10 then
	478	begin
	479	dec( TextPointer );
	480	TheChar := TextPointer^;
	481	if TheChar = #13 then
	482	begin
	483	// skip CR preceeding LF
	484	end
	485	else
	486	inc( TextPointer );
	487	end;
	488	end;
	489
	490	'>': // ---- tag found ----
	491	begin
	492	FoundTag := true;
	493	if TextPointer > TextStart then
	494	begin
	495	PreviousChar := ( TextPointer - 1 )^;
	496	if PreviousChar = '>' then
	497	begin
	498	// no. just a literal >
	499	FoundTag := false;
	500	ElementType := teText;
	501	dec( TextPointer );
	502	end
	503	end;
	504
	505	if FoundTag then
	506	begin
	507	Tag := ExtractPreviousTag( TextStart, TextPointer );
	508	if Tag.TagType = ttImage then
	509	ElementType := teImage
	510	else
	511	ElementType := teStyle;
	512	end;
	513	end;
	514
	515	'<': // should be double
	516	begin
	517	ElementType := teText;
	518	if TextPointer > TextStart then
	519	begin
	520	PreviousChar := TextPointer^;
	521	if PreviousChar = '<' then
	522	dec( TextPointer );
	523	end;
	524	end
[418]	525
	526	// '-': // ---- Hyphen (ALT)
	527	// ElementType := teWrapChar;
	528
[15]	529	else
	530	ElementType := teText;
	531	end;
	532	end; // with
	533	NextElement := TextPointer;
	534	end;
	535
	536	function GetTagColor( const ColorParam: string;
	537	var Color: TColor ): boolean;
	538	var
	539	ColorIndex: longint;
	540	begin
	541	Result := false;
	542	if ColorParam <> '' then
	543	begin
	544	if ColorParam[ 1 ] = '#' then
	545	begin
	546	try
	547	Color := HexToInt( StrRightFrom( ColorParam, 2 ) );
	548	Result := true;
	549	except
	550	end;
	551	end
	552	else
	553	begin
	554	for ColorIndex := 0 to High( StandardColors ) do
	555	begin
	556	if StringsSame( ColorParam, StandardColors[ ColorIndex ].Name ) then
	557	begin
	558	Color := StandardColors[ ColorIndex ].Color;
	559	Result := true;
	560	break;
	561	end;
	562	end;
	563	end;
	564	end;
	565	end;
	566
	567	function GetTagTextAlignment( const AlignParam: string;
	568	const Default: TTextAlignment ): TTextAlignment;
	569	begin
	570	if StringsSame( AlignParam, 'left' ) then
	571	Result := taLeft
	572	else if StringsSame( AlignParam, 'center' ) then
	573	Result := taCenter
	574	else if StringsSame( AlignParam, 'right' ) then
	575	Result := taRight
	576	else
	577	Result := Default;
	578	end;
	579
	580	function GetTagTextWrap( const WrapParam: string ): boolean;
	581	begin
	582	Result := StringsSame( WrapParam, 'yes' );
	583	end;
	584
	585	function RichTextFindString( pRichText: PChar;
	586	const Text: string;
	587	var pMatch: PChar;
	588	var MatchLength: longint ): boolean;
	589	var
	590	P: PChar;
	591	NextP: PChar;
	592	Element: TTextElement;
	593	pMatchStart: pchar;
	594	pMatchStartNext: pchar;
	595	MatchIndex: longint;
	596
	597	CountryData: COUNTRYCODE;
	598	CaseMap: array[ Low( Char )..High( Char ) ] of char;
	599	C: Char;
	600	begin
	601	if Length( Text ) = 0 then
	602	begin
	603	// null string always matches
	604	Result := true;
	605	pMatch := pRichText;
	606	MatchLength := 0;
	607	exit;
	608	end;
	609
	610	P := pRichText;
	611
	612	MatchIndex := 1;
	613
	614	// Get case mapping of all chars (only SBCS)
	615
	616	CountryData.Country := 0; // default country
	617	CountryData.CodePage := 0; // default codepage
	618
	619	// fill array with all chars
	620	for C := Low( CaseMap ) to High( CaseMap ) do
	621	CaseMap[ C ] := C;
	622
	623	DosMapCase( sizeof( CaseMap ),
	624	CountryData,
	625	CaseMap );
	626
	627	// Now search, case insensitively
	628
	629	while true do
	630	begin
	631	Element := ExtractNextTextElement( P, NextP );
	632
	633	case Element.ElementType of
	634	teTextEnd:
	635	// end of text
	636	break;
	637
	638	teImage,
	639	teLineBreak:
	640	// breaks a potential match
	641	MatchIndex := 1;
	642
	643	teStyle:
	644	; // ignore, matches can continue
	645
	646	else
	647	begin
	648	if CaseMap[ Element.Character ]
	649	= CaseMap[ Text[ MatchIndex ] ] then
	650	begin
	651	// found a match
	652	if MatchIndex = 1 then
	653	begin
	654	pMatchStart := P; // store start of match
	655	pMatchStartNext := NextP;
	656	end;
	657
	658	inc( MatchIndex );
	659	if MatchIndex > Length( Text ) then
	660	begin
	661	// found a complete match
	662	Result := true;
	663	pMatch := pMatchStart;
	664	MatchLength := PCharDiff( P, pMatchStart )
	665	+ 1; // include this char
	666	exit;
	667	end;
	668	end
	669	else
	670	begin
	671	// not a match
	672	if MatchIndex > 1 then
	673	begin
	674	// go back to start of match, + 1
	675	NextP := pMatchStartNext;
	676	MatchIndex := 1;
	677	end;
	678	end;
	679	end;
	680	end;
	681
	682	P := NextP;
	683	end;
	684
	685	// no match found
	686	Result := false;
	687	pMatch := nil;
	688	MatchLength := 0;
	689	end;
	690
	691	function RichTextWordLeft( pRichText: PChar;
	692	pStart: PChar ): PChar;
	693	Var
	694	P: PChar;
	695	NextP: PChar;
	696	Element: TTextElement;
	697	begin
	698	P := pStart;
	699
	700	// skip whitespace/tags...
	701	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	702	P := NextP;
	703	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
	704	begin
	705	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	706	P := NextP;
	707	end;
	708	if Element.ElementType = teTextEnd then
	709	begin
	710	Result := P;
	711	// out of text
	712	exit;
	713	end;
	714
	715	// back to start of word, skip text/tags
	716	while true do
	717	begin
	718	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	719	if not ( Element.ElementType in [ teText, teStyle ] ) then
	720	break;
	721	P := NextP;
	722	end;
	723	Result := P;
	724	end;
	725
	726	function RichTextWordRight( pStart: PChar ): PChar;
	727	Var
	728	P: PChar;
	729	NextP: PChar;
	730	Element: TTextElement;
	731	begin
	732	P := pStart;
	733
	734	// skip text/tags...
	735	Element := ExtractNextTextElement( P, NextP );
	736	while Element.ElementType in [ teStyle, teText ] do
	737	begin
	738	P := NextP;
	739	Element := ExtractNextTextElement( P, NextP );
	740	end;
	741	if Element.ElementType <> teTextEnd then
	742	begin
	743	// skip whitespace
	744	Element := ExtractNextTextElement( P, NextP );
	745	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
	746	begin
	747	P := NextP;
	748	Element := ExtractNextTextElement( P, NextP );
	749	end;
	750	end;
	751
	752	Result := P;
	753	end;
	754
	755	function RichTextWordAt( pRichText: PChar;
	756	pStart: PChar;
	757	Var pWordStart: PChar;
	758	Var WordLength: longint ): boolean;
	759	Var
	760	P: PChar;
	761	NextP: PChar;
	762	Element: TTextElement;
	763	pWordEnd: PChar;
	764	begin
	765	P := pStart;
	766	Element := ExtractNextTextElement( P, NextP );
	767	if not ( Element.ElementType in [ teStyle, teText ] ) then
	768	begin
	769	// not in a word.
	770	result := false;
	771	pWordStart := nil;
	772	WordLength := 0;
	773	exit;
	774	end;
	775	// find end of the word
	776	while Element.ElementType in [ teStyle, teText ] do
	777	begin
	778	P := NextP;
	779	Element := ExtractNextTextElement( P, NextP );
	780	end;
	781	pWordEnd := P;
	782
	783	P := pStart;
	784	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	785	while Element.ElementType in [ teStyle, teText ] do
	786	begin
	787	P := NextP;
	788	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	789	end;
	790	pWordStart := P;
	791	WordLength := PCharDiff( pWordEnd, pWordStart );
	792	Result := true;
	793	end;
	794
	795	function CopyPlainTextToBuffer( StartP: PChar;
	796	EndP: PChar;
	797	Buffer: PChar;
	798	BufferLength: longint ): longint;
	799	var
	800	Q: PChar;
	801	EndQ: Pchar;
	802	P: PChar;
	803	NextP: PChar;
	804	Element: TTextElement;
	805	begin
	806	P := StartP;
	807	Q := Buffer;
	808	EndQ := Buffer + BufferLength;
	809
	810	while P < EndP do
	811	begin
	812	Element := ExtractNextTextElement( P, NextP );
	813	case Element.ElementType of
	814	teText, teWordBreak:
	815	begin
	816	// copy char
	817	if Buffer <> nil then
	818	Q[ 0 ] := Element.Character;
	819	inc( Q );
	820	end;
	821
	822	teLineBreak:
	823	begin
	824	if Buffer <> nil then
	825	Q[ 0 ] := #13;
	826	inc( Q );
	827	if Q = EndQ then
	828	// end of buffer
	829	break;
	830
	831	if Buffer <> nil then
	832	Q[ 0 ] := #10;
	833	inc( Q );
	834	end;
	835	end;
	836
	837	if Q = EndQ then
	838	// end of buffer
	839	break;
	840
	841	P := NextP;
	842	end;
[421]	843
[423]	844	Q[ 0 ] := #0; // ALT - make sure string is terminated
[421]	845
[15]	846	result := PCharDiff( Q, Buffer );
	847	end;
	848
[418]	849	// ALT begins
	850	//
	851	// Check for special text element types that depend on context.
	852	//
	853	procedure CheckSpecialElementType( const Character: Char;
	854	var ElementType: TTextElementType;
	855	var InsideDBC: Boolean;
	856	const Codepage: LongInt );
	857	var
	858	CharByte: Byte;
	859	begin
	860	if Codepage in [ 874, 932, 936, 942, 943, 949, 950, 1381, 1386 ] then
	861	begin
	862	CharByte := ord( Character );
	863	if InsideDBC then
	864	begin
	865	InsideDBC := false;
	866	// sanity check for corrupt text sequence (definitely not foolproof)
	867	if IsDBCSSecondByte( CharByte, Codepage ) then
	868	ElementType := teSecondByte
	869	else
	870	ElementType := teText;
	871	end
	872	else
	873	begin
	874	if IsAsianWrapChar( CharByte, Codepage ) then
	875	begin
	876	ElementType := teWrapChar;
	877	InsideDBC := false;
	878	end
	879	else if IsDBCSLeadByte( CharByte, Codepage ) then
	880	begin
	881	ElementType := teLeadByte;
	882	InsideDBC := true;
	883	end;
	884	end;
	885	end;
	886	end;
	887
	888	// Check if this (single-byte) character is a legal wrap point under certain
	889	// Asian codepages. This is really only used for Thai and for Japanese
	890	// half-width katakana; other DBCS languages use double-byte characters for all
	891	// their native glyphs.
	892	//
	893	function IsAsianWrapChar( const CharByte: Byte;
	894	const Codepage: LongInt ): boolean;
	895	begin
	896	Result := false;
	897
	898	if ( CharByte < $80) then
	899	exit;
	900
	901	case Codepage of
	902	932, 942, 943: // Japanese
	903	if CharByte in [ $A2, $A6, $B1..$DD ] then
	904	Result := true;
	905	874: // Thai
	906	Result := true;
	907	end;
	908	end;
	909
	910	// Check if this is the lead byte of a double-byte character. This is essential
	911	// to know in certain cases:
	912	// - Nothing must ever be inserted between such a byte and the next byte
	913	// (e.g. line break, tag, etc).
	914	// - Cursor position must never be set between such a byte and the next byte.
	915	// - Selection state must never change between such a byte and the next byte.
	916	//
	917	function IsDBCSLeadByte( const CharByte: Byte;
	918	const Codepage: LongInt ): boolean;
	919	begin
	920	Result := false;
	921
	922	case Codepage of
	923	932, 942, 943: // Japanese
	924	if CharByte in [ $81..$9F, $E0..$FC ] then
	925	Result := true;
	926	949: // Korean KSC
	927	if CharByte in [ $85..$FE ] then
	928	Result := true;
	929	1381: // Chinese GB2312
	930	if CharByte in [ $8C..$FE ] then
	931	Result := true;
	932	936, 950, 1386: // Chinese BIG-5 or GBK
	933	if CharByte in [ $81..$FE ] then
	934	Result := true;
	935	end;
	936	end;
	937
	938	// Check to see if this byte is a valid second byte in a double-byte character.
	939	// (This doesn't guarantee that it IS such a byte, only that it COULD be. The
	940	// caller is assumed to know whether we're in a double byte character or not.)
	941	//
	942	function IsDBCSSecondByte( const CharByte: Byte;
	943	const Codepage: LongInt ): boolean;
	944	begin
	945	Result := false;
	946
	947	case Codepage of
	948	932, 936, 942, 943, 949, 950, 1386:
	949	if CharByte >= $40 then
	950	Result := true;
	951	1381:
	952	if CharByte >= $A1 then
	953	Result := true;
	954	end;
	955	end;
	956	//
	957	// ALT ends
	958
[15]	959	Initialization
	960	End.

Note: See TracBrowser for help on using the repository browser.

Download in other formats: