Context Navigation

source: trunk/Components/RichTextDocumentUnit.pas@ 418

Last change on this file since 418 was 418, checked in by ataylor, 6 years ago
Experimental new logic to try and fix DBCS text wrapping.
Property svn:eol-style set to `native`
File size: 25.4 KB

Rev	Line
[15]	1	Unit RichTextDocumentUnit;
	2
	3	// Declarations of tags, and parsing functions
	4
	5	Interface
	6
	7	uses
	8	Classes;
	9
	10	type
	11	TTagType = ( ttInvalid,
	12	ttBold, ttBoldOff,
	13	ttItalic, ttItalicOff,
	14	ttUnderline, ttUnderlineOff,
	15	ttFixedWidthOn, ttFixedWidthOff,
	16	ttHeading1, ttHeading2, ttHeading3, ttHeadingOff,
	17	ttColor, ttColorOff,
	18	ttBackgroundColor, ttBackgroundColorOff,
	19	ttRed, ttBlue, ttGreen, ttBlack,
	20	ttWrap,
	21	ttAlign,
	22	ttBeginLink, ttEndLink,
	23	ttSetLeftMargin, ttSetRightMargin,
	24	ttImage,
	25	ttFont, ttFontOff,
	26	ttEnd );
	27
	28	TStandardColor = record
	29	Name: string[ 32 ];
	30	Color: TColor;
	31	end;
	32
	33	TTag = record
	34	TagType: TTagType;
	35	Arguments: string;
	36	end;
	37
[418]	38	TTextElementType = ( teText, // a character
[15]	39	teWordBreak,
[418]	40	teLineBreak, // end of para
[15]	41	teTextEnd,
	42	teImage,
[418]	43	teStyle,
	44	teWrapChar, // A non-whitespace wrappable character (ALT)
	45	teLeadByte, // DBCS lead byte (ALT)
	46	teSecondByte ); // DBCS secondary byte (ALT)
[15]	47
	48	TTextElement = record
	49	ElementType: TTextElementType;
	50	Character: Char;
	51	Tag: TTag;
	52	end;
	53
	54	TTextAlignment = ( taLeft,
	55	taRight,
	56	taCenter );
	57
	58	// Returns tag pointed to by TextPointer and
	59	// moves TextPointer to the first char after the tag.
	60	Function ExtractTag( Var TextPointer: PChar ): TTag;
	61
	62	// Returns tag ending at TextPointer
	63	// (Expects textpointer is currently pointing at the >)
	64	// and moves TextPointer to the first char of the tag
	65	Function ExtractPreviousTag( const TextStart: PChar;
	66	Var TextPointer: PChar ): TTag;
	67
	68	function ExtractNextTextElement( TextPointer: PChar;
	69	Var NextElement: PChar ): TTextElement;
	70
	71	function ExtractPreviousTextElement( const TextStart: PChar;
	72	TextPointer: PChar;
	73	Var NextElement: PChar ): TTextElement;
	74
	75	// Parse a color name or value (#hexval). Returns true if valid
	76	function GetTagColor( const ColorParam: string;
	77	var Color: TColor ): boolean;
	78
	79	function GetTagTextAlignment( const AlignParam: string;
	80	const Default: TTextAlignment ): TTextAlignment;
	81
	82	function GetTagTextWrap( const WrapParam: string ): boolean;
	83
	84	// Search within a rich text document for the given text
	85	// if found, returns true, pMatch is set to the first match,
	86	// and MatchLength returns the length of the match
	87	// (which may be greater than the length of Text due to
	88	// to skipping tags)
	89	// if not found, returns false, pMatch is set to nil
	90	function RichTextFindString( pRichText: PChar;
	91	const Text: string;
	92	var pMatch: PChar;
	93	var MatchLength: longint ): boolean;
	94
	95	// Returns the start of the previous word,
	96	// or the current word if pStart is in the middle of the word
	97	function RichTextWordLeft( pRichText: PChar;
	98	pStart: PChar ): PChar;
	99
	100	// Returns the start of the next word.
	101	function RichTextWordRight( pStart: PChar ): PChar;
	102
	103	// If pStart is in the middle of a word, then
	104	// returns true and sets the start and length of the word
	105	function RichTextWordAt( pRichText: PChar;
	106	pStart: PChar;
	107	Var pWordStart: PChar;
	108	Var WordLength: longint ): boolean;
	109
	110	// Copies plaintext of richtext starting at StartP
	111	// to the given buffer. Returns number of characters copied.
	112	// Buffer may be nil
	113	// If BufferLength is negative, it is effectively ignored
	114	function CopyPlainTextToBuffer( StartP: PChar;
	115	EndP: PChar;
	116	Buffer: PChar;
	117	BufferLength: longint ): longint;
	118
[418]	119
	120	// ALT begins
	121	//
	122
	123	// Check for special text element types and adjust value accordingly.
	124	procedure CheckSpecialElementType( const Character: Char;
	125	var ElementType: TTextElementType;
	126	var InsideDBC: Boolean;
	127	const Codepage: LongInt );
	128
	129	// Returns true if the given byte value is a legally-wrappable single-byte
	130	// character under the given Asian codepage.
	131	function IsAsianWrapChar( const CharByte: Byte;
	132	const Codepage: LongInt ): boolean;
	133
	134	// Returns true if the given byte value is the leading byte of a multi-byte
	135	// character under the given Asian codepage.
	136	function IsDBCSLeadByte( const CharByte: Byte;
	137	const Codepage: LongInt ): boolean;
	138
	139	// Returns true if the given byte value is valid as a possible second byte of
	140	// a multi-byte character (this does not guarantee that it IS one, just that
	141	// it COULD be).
	142	function IsDBCSSecondByte( const CharByte: Byte;
	143	const Codepage: LongInt ): boolean;
	144
	145	// Adjusts the character position to the beginning of any multi-byte character.
	146	procedure MoveToCharacterBoundary( TextPointer: PChar;
	147	var Index: LongInt;
	148	var Offset: LongInt;
	149	RowStart: LongInt;
	150	Codepage: LongInt );
	151	//
	152	// ALT ends
	153
	154
[15]	155	Implementation
	156
	157	uses
	158	BseDOS, // for NLS/case mapping
[405]	159	SysUtils,
	160	ACLStringUtility;
[15]	161
	162	const
	163	TagStr: array[ ttInvalid .. ttEnd ] of string =
	164	(
	165	'', //
	166	'b',
	167	'/b',
	168	'i',
	169	'/i',
	170	'u',
	171	'/u',
	172	'tt',
	173	'/tt',
	174	'h1',
	175	'h2',
	176	'h3',
	177	'/h',
	178	'color',
	179	'/color',
	180	'backcolor',
	181	'/backcolor',
	182	'red',
	183	'blue',
	184	'green',
	185	'black',
	186	'wrap',
	187	'align',
	188	'link',
	189	'/link',
	190	'leftmargin',
	191	'rightmargin',
	192	'image',
	193	'font',
	194	'/font',
	195	''
	196	);
	197
	198	StandardColors: array[ 0..7 ] of TStandardColor =
	199	(
	200	( Name : 'white' ; Color: clWhite ),
	201	( Name : 'black' ; Color: clBlack ),
	202	( Name : 'red' ; Color: clRed ),
	203	( Name : 'blue' ; Color: clBlue ),
	204	( Name : 'green' ; Color: clLime ),
	205	( Name : 'purple'; Color: clFuchsia ),
	206	( Name : 'yellow'; Color: clYellow ),
	207	( Name : 'cyan' ; Color: clAqua )
	208	);
	209
	210	Procedure ParseTag( const Text: string;
	211	Var Tag: TTag );
	212	var
	213	TagType: TTagType;
	214	TagTypeText: string;
	215	SpacePos: longint;
	216	begin
	217	SpacePos := Pos( ' ', Text );
	218	if SpacePos <> 0 then
	219	begin
	220	Tag.Arguments := trim( Copy( Text, SpacePos + 1, 255 ) );
	221	TagTypeText := LowerCase( Copy( Text, 1, SpacePos - 1 ) );
	222	end
	223	else
	224	begin
	225	Tag.Arguments := ''; // to save time copying when not needed
	226	TagTypeText := LowerCase( Text );
	227	end;
	228
	229	for TagType := ttBold to ttEnd do
	230	begin
	231	if TagStr[ TagType ] = TagTypeText then
	232	begin
	233	Tag.TagType := TagType;
	234	exit;
	235	end;
	236	end;
	237
	238	// not found
	239	Tag.TagType := ttInvalid;
	240	end;
	241
	242	var
	243	TagText: string;
	244	TagArgText: string;
	245
	246	Function ExtractTag( Var TextPointer: PChar ): TTag;
	247	var
	248	CurrentChar: Char;
	249	TagTooLong: boolean;
	250	InQuote: boolean;
	251	begin
	252	// assert( TextPointer[ 0 ] = '<' );
	253	TagText := '';
	254	TagTooLong := false;
	255	InQuote := false;
	256
	257	repeat
	258	CurrentChar := TextPointer^;
	259
	260	if ( CurrentChar = '>' )
	261	and ( not InQuote ) then
	262	begin
	263	// found tag end.
	264	if TagTooLong then
	265	Result.TagType := ttInvalid
	266	else
	267	ParseTag( TagText, Result );
	268	inc( TextPointer );
	269	exit;
	270	end;
	271
	272	if CurrentChar = #0 then
	273	begin
	274	// if we reach here we have reached the end of text
	275	// during a tag. invalid tag.
	276	Result.TagType := ttInvalid;
	277	exit;
	278	end;
	279
	280	if CurrentChar = DoubleQuote then
	281	begin
	282	if not InQuote then
	283	begin
	284	InQuote := true
	285	end
	286	else
	287	begin
	288	// Could be escaped quote ""
	289	if ( TextPointer + 1 ) ^ = DoubleQuote then
	290	begin
	291	// yes it is
	292	inc( TextPointer ); // skip second one
	293	end
	294	else
	295	begin
	296	// no, not an escaped quote
	297	InQuote := false;
	298	end;
	299	end;
	300
	301	end;
	302
	303	if not TagTooLong then
	304	if Length( TagText ) < 200 then
	305	TagText := TagText + CurrentChar
	306	else
	307	TagTooLong := true; // but keep going until the end
	308
	309	inc( TextPointer );
	310	until false;
	311
	312	end;
	313
	314	// Expects textpointer is currently pointing at the >
	315	Function ExtractPreviousTag( const TextStart: PChar;
	316	Var TextPointer: PChar ): TTag;
	317	var
	318	CurrentChar: Char;
	319	TagTooLong: boolean;
	320	InQuote: boolean;
	321	begin
	322	TagText := '';
	323	TagTooLong := false;
	324	InQuote := false;
	325
	326	repeat
	327	dec( TextPointer );
	328	if TextPointer < TextStart then
	329	begin
	330	// if we reach here we have reached the end of text
	331	// during a tag. invalid tag.
	332	Result.TagType := ttInvalid;
	333	exit;
	334	end;
	335	CurrentChar := TextPointer^;
	336
	337	if ( CurrentChar = '<' )
	338	and ( not InQuote ) then
	339	begin
	340	// found tag end.
	341	if TagTooLong then
	342	Result.TagType := ttInvalid
	343	else
	344	ParseTag( TagText, Result );
	345	exit;
	346	end;
	347
	348	if CurrentChar = DoubleQuote then
	349	begin
	350	if not InQuote then
	351	begin
	352	InQuote := true
	353	end
	354	else
	355	begin
	356	// Could be escaped quote ""
	357	if TextPointer <= TextStart then
	358	begin
	359	// start of text... somethin weird
	360	InQuote := false;
	361	end
	362	else if ( TextPointer - 1 ) ^ = DoubleQuote then
	363	begin
	364	// yes it is
	365	dec( TextPointer ); // skip second one
	366	end
	367	else
	368	begin
	369	// no, not an escaped quote
	370	InQuote := false;
	371	end;
	372	end;
	373
	374	end;
	375
	376	if not TagTooLong then
	377	if Length( TagText ) < 200 then
	378	TagText := CurrentChar + TagText
	379	else
	380	TagTooLong := true; // but keep going until the end
	381
	382	until false;
	383
	384	end;
	385
	386	function ExtractNextTextElement( TextPointer: PChar;
	387	Var NextElement: PChar ): TTextElement;
	388	var
	389	TheChar: Char;
[418]	390	NextChar: Char;
[15]	391	begin
	392	with Result do
	393	begin
	394	TheChar := TextPointer^;
	395	Character := TheChar;
	396	inc( TextPointer );
	397
[418]	398
[15]	399	case TheChar of
	400	' ': // ---- Space (word break) found ----
	401	ElementType := teWordBreak;
	402
	403	#10, #13: // ---- End of line found ----
	404	begin
	405	ElementType := teLineBreak;
	406	if TheChar = #13 then
	407	begin
	408	TheChar := TextPointer^;
	409	if TheChar = #10 then
	410	// skip CR following LF
	411	inc( TextPointer );
	412	end;
	413	end;
	414
	415	#0: // ---- end of text found ----
	416	ElementType := teTextEnd;
	417
	418	'<': // ---- tag found? ----
	419	begin
	420	NextChar := TextPointer^;
	421	if NextChar = '<' then
	422	begin
	423	// no. just a literal <
	424	ElementType := teText;
	425	inc( TextPointer );
	426	end
	427	else
	428	begin
	429	Tag := ExtractTag( TextPointer );
	430	if Tag.TagType = ttImage then
	431	ElementType := teImage
	432	else
	433	ElementType := teStyle;
	434	end;
	435
	436	end;
	437
	438	'>': // check - should be double
	439	begin
	440	ElementType := teText;
	441	NextChar := TextPointer^;
	442	if NextChar = '>' then
	443	inc( TextPointer );
	444	end;
	445
[418]	446	// '-': // ---- Hyphen (ALT)
	447	// ElementType := teWrapChar;
	448
[15]	449	else
	450	ElementType := teText;
	451	end;
[418]	452
[15]	453	end; // with
[418]	454
[15]	455	NextElement := TextPointer;
	456	end;
	457
	458	function ExtractPreviousTextElement( const TextStart: PChar;
	459	TextPointer: PChar;
	460	Var NextElement: PChar ): TTextElement;
	461	var
	462	TheChar: Char;
	463	PreviousChar: Char;
	464	FoundTag: boolean;
	465	begin
	466	with Result do
	467	begin
	468	dec( TextPointer );
	469	TheChar := TextPointer^;
	470	Character := TheChar;
	471	if TextPointer < TextStart then
	472	begin
	473	ElementType := teTextEnd;
	474	exit;
	475	end;
	476
	477	case TheChar of
	478	' ': // ---- Space (word break) found ----
	479	ElementType := teWordBreak;
	480
	481	#10, #13: // ---- End of line found ----
	482	begin
	483	ElementType := teLineBreak;
	484	if TheChar = #10 then
	485	begin
	486	dec( TextPointer );
	487	TheChar := TextPointer^;
	488	if TheChar = #13 then
	489	begin
	490	// skip CR preceeding LF
	491	end
	492	else
	493	inc( TextPointer );
	494	end;
	495	end;
	496
	497	'>': // ---- tag found ----
	498	begin
	499	FoundTag := true;
	500	if TextPointer > TextStart then
	501	begin
	502	PreviousChar := ( TextPointer - 1 )^;
	503	if PreviousChar = '>' then
	504	begin
	505	// no. just a literal >
	506	FoundTag := false;
	507	ElementType := teText;
	508	dec( TextPointer );
	509	end
	510	end;
	511
	512	if FoundTag then
	513	begin
	514	Tag := ExtractPreviousTag( TextStart, TextPointer );
	515	if Tag.TagType = ttImage then
	516	ElementType := teImage
	517	else
	518	ElementType := teStyle;
	519	end;
	520	end;
	521
	522	'<': // should be double
	523	begin
	524	ElementType := teText;
	525	if TextPointer > TextStart then
	526	begin
	527	PreviousChar := TextPointer^;
	528	if PreviousChar = '<' then
	529	dec( TextPointer );
	530	end;
	531	end
[418]	532
	533	// '-': // ---- Hyphen (ALT)
	534	// ElementType := teWrapChar;
	535
[15]	536	else
	537	ElementType := teText;
	538	end;
	539	end; // with
	540	NextElement := TextPointer;
	541	end;
	542
	543	function GetTagColor( const ColorParam: string;
	544	var Color: TColor ): boolean;
	545	var
	546	ColorIndex: longint;
	547	begin
	548	Result := false;
	549	if ColorParam <> '' then
	550	begin
	551	if ColorParam[ 1 ] = '#' then
	552	begin
	553	try
	554	Color := HexToInt( StrRightFrom( ColorParam, 2 ) );
	555	Result := true;
	556	except
	557	end;
	558	end
	559	else
	560	begin
	561	for ColorIndex := 0 to High( StandardColors ) do
	562	begin
	563	if StringsSame( ColorParam, StandardColors[ ColorIndex ].Name ) then
	564	begin
	565	Color := StandardColors[ ColorIndex ].Color;
	566	Result := true;
	567	break;
	568	end;
	569	end;
	570	end;
	571	end;
	572	end;
	573
	574	function GetTagTextAlignment( const AlignParam: string;
	575	const Default: TTextAlignment ): TTextAlignment;
	576	begin
	577	if StringsSame( AlignParam, 'left' ) then
	578	Result := taLeft
	579	else if StringsSame( AlignParam, 'center' ) then
	580	Result := taCenter
	581	else if StringsSame( AlignParam, 'right' ) then
	582	Result := taRight
	583	else
	584	Result := Default;
	585	end;
	586
	587	function GetTagTextWrap( const WrapParam: string ): boolean;
	588	begin
	589	Result := StringsSame( WrapParam, 'yes' );
	590	end;
	591
	592	function RichTextFindString( pRichText: PChar;
	593	const Text: string;
	594	var pMatch: PChar;
	595	var MatchLength: longint ): boolean;
	596	var
	597	P: PChar;
	598	NextP: PChar;
	599	Element: TTextElement;
	600	pMatchStart: pchar;
	601	pMatchStartNext: pchar;
	602	MatchIndex: longint;
	603
	604	CountryData: COUNTRYCODE;
	605	CaseMap: array[ Low( Char )..High( Char ) ] of char;
	606	C: Char;
	607	begin
	608	if Length( Text ) = 0 then
	609	begin
	610	// null string always matches
	611	Result := true;
	612	pMatch := pRichText;
	613	MatchLength := 0;
	614	exit;
	615	end;
	616
	617	P := pRichText;
	618
	619	MatchIndex := 1;
	620
	621	// Get case mapping of all chars (only SBCS)
	622
	623	CountryData.Country := 0; // default country
	624	CountryData.CodePage := 0; // default codepage
	625
	626	// fill array with all chars
	627	for C := Low( CaseMap ) to High( CaseMap ) do
	628	CaseMap[ C ] := C;
	629
	630	DosMapCase( sizeof( CaseMap ),
	631	CountryData,
	632	CaseMap );
	633
	634	// Now search, case insensitively
	635
	636	while true do
	637	begin
	638	Element := ExtractNextTextElement( P, NextP );
	639
	640	case Element.ElementType of
	641	teTextEnd:
	642	// end of text
	643	break;
	644
	645	teImage,
	646	teLineBreak:
	647	// breaks a potential match
	648	MatchIndex := 1;
	649
	650	teStyle:
	651	; // ignore, matches can continue
	652
	653	else
	654	begin
	655	if CaseMap[ Element.Character ]
	656	= CaseMap[ Text[ MatchIndex ] ] then
	657	begin
	658	// found a match
	659	if MatchIndex = 1 then
	660	begin
	661	pMatchStart := P; // store start of match
	662	pMatchStartNext := NextP;
	663	end;
	664
	665	inc( MatchIndex );
	666	if MatchIndex > Length( Text ) then
	667	begin
	668	// found a complete match
	669	Result := true;
	670	pMatch := pMatchStart;
	671	MatchLength := PCharDiff( P, pMatchStart )
	672	+ 1; // include this char
	673	exit;
	674	end;
	675	end
	676	else
	677	begin
	678	// not a match
	679	if MatchIndex > 1 then
	680	begin
	681	// go back to start of match, + 1
	682	NextP := pMatchStartNext;
	683	MatchIndex := 1;
	684	end;
	685	end;
	686	end;
	687	end;
	688
	689	P := NextP;
	690	end;
	691
	692	// no match found
	693	Result := false;
	694	pMatch := nil;
	695	MatchLength := 0;
	696	end;
	697
	698	function RichTextWordLeft( pRichText: PChar;
	699	pStart: PChar ): PChar;
	700	Var
	701	P: PChar;
	702	NextP: PChar;
	703	Element: TTextElement;
	704	begin
	705	P := pStart;
	706
	707	// skip whitespace/tags...
	708	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	709	P := NextP;
	710	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
	711	begin
	712	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	713	P := NextP;
	714	end;
	715	if Element.ElementType = teTextEnd then
	716	begin
	717	Result := P;
	718	// out of text
	719	exit;
	720	end;
	721
	722	// back to start of word, skip text/tags
	723	while true do
	724	begin
	725	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	726	if not ( Element.ElementType in [ teText, teStyle ] ) then
	727	break;
	728	P := NextP;
	729	end;
	730	Result := P;
	731	end;
	732
	733	function RichTextWordRight( pStart: PChar ): PChar;
	734	Var
	735	P: PChar;
	736	NextP: PChar;
	737	Element: TTextElement;
	738	begin
	739	P := pStart;
	740
	741	// skip text/tags...
	742	Element := ExtractNextTextElement( P, NextP );
	743	while Element.ElementType in [ teStyle, teText ] do
	744	begin
	745	P := NextP;
	746	Element := ExtractNextTextElement( P, NextP );
	747	end;
	748	if Element.ElementType <> teTextEnd then
	749	begin
	750	// skip whitespace
	751	Element := ExtractNextTextElement( P, NextP );
	752	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
	753	begin
	754	P := NextP;
	755	Element := ExtractNextTextElement( P, NextP );
	756	end;
	757	end;
	758
	759	Result := P;
	760	end;
	761
	762	function RichTextWordAt( pRichText: PChar;
	763	pStart: PChar;
	764	Var pWordStart: PChar;
	765	Var WordLength: longint ): boolean;
	766	Var
	767	P: PChar;
	768	NextP: PChar;
	769	Element: TTextElement;
	770	pWordEnd: PChar;
	771	begin
	772	P := pStart;
	773	Element := ExtractNextTextElement( P, NextP );
	774	if not ( Element.ElementType in [ teStyle, teText ] ) then
	775	begin
	776	// not in a word.
	777	result := false;
	778	pWordStart := nil;
	779	WordLength := 0;
	780	exit;
	781	end;
	782	// find end of the word
	783	while Element.ElementType in [ teStyle, teText ] do
	784	begin
	785	P := NextP;
	786	Element := ExtractNextTextElement( P, NextP );
	787	end;
	788	pWordEnd := P;
	789
	790	P := pStart;
	791	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	792	while Element.ElementType in [ teStyle, teText ] do
	793	begin
	794	P := NextP;
	795	Element := ExtractPreviousTextElement( pRichText, P, NextP );
	796	end;
	797	pWordStart := P;
	798	WordLength := PCharDiff( pWordEnd, pWordStart );
	799	Result := true;
	800	end;
	801
	802	function CopyPlainTextToBuffer( StartP: PChar;
	803	EndP: PChar;
	804	Buffer: PChar;
	805	BufferLength: longint ): longint;
	806	var
	807	Q: PChar;
	808	EndQ: Pchar;
	809	P: PChar;
	810	NextP: PChar;
	811	Element: TTextElement;
	812	begin
	813	P := StartP;
	814	Q := Buffer;
	815	EndQ := Buffer + BufferLength;
	816
	817	while P < EndP do
	818	begin
	819	Element := ExtractNextTextElement( P, NextP );
	820	case Element.ElementType of
	821	teText, teWordBreak:
	822	begin
	823	// copy char
	824	if Buffer <> nil then
	825	Q[ 0 ] := Element.Character;
	826	inc( Q );
	827	end;
	828
	829	teLineBreak:
	830	begin
	831	if Buffer <> nil then
	832	Q[ 0 ] := #13;
	833	inc( Q );
	834	if Q = EndQ then
	835	// end of buffer
	836	break;
	837
	838	if Buffer <> nil then
	839	Q[ 0 ] := #10;
	840	inc( Q );
	841	end;
	842	end;
	843
	844	if Q = EndQ then
	845	// end of buffer
	846	break;
	847
	848	P := NextP;
	849	end;
	850	result := PCharDiff( Q, Buffer );
	851	end;
	852
[418]	853	// ALT begins
	854	//
	855	// Check for special text element types that depend on context.
	856	//
	857	procedure CheckSpecialElementType( const Character: Char;
	858	var ElementType: TTextElementType;
	859	var InsideDBC: Boolean;
	860	const Codepage: LongInt );
	861	var
	862	CharByte: Byte;
	863	begin
	864	if Codepage in [ 874, 932, 936, 942, 943, 949, 950, 1381, 1386 ] then
	865	begin
	866	CharByte := ord( Character );
	867	if InsideDBC then
	868	begin
	869	InsideDBC := false;
	870	// sanity check for corrupt text sequence (definitely not foolproof)
	871	if IsDBCSSecondByte( CharByte, Codepage ) then
	872	ElementType := teSecondByte
	873	else
	874	ElementType := teText;
	875	end
	876	else
	877	begin
	878	if IsAsianWrapChar( CharByte, Codepage ) then
	879	begin
	880	ElementType := teWrapChar;
	881	InsideDBC := false;
	882	end
	883	else if IsDBCSLeadByte( CharByte, Codepage ) then
	884	begin
	885	ElementType := teLeadByte;
	886	InsideDBC := true;
	887	end;
	888	end;
	889	end;
	890	end;
	891
	892	// Check if this (single-byte) character is a legal wrap point under certain
	893	// Asian codepages. This is really only used for Thai and for Japanese
	894	// half-width katakana; other DBCS languages use double-byte characters for all
	895	// their native glyphs.
	896	//
	897	function IsAsianWrapChar( const CharByte: Byte;
	898	const Codepage: LongInt ): boolean;
	899	begin
	900	Result := false;
	901
	902	if ( CharByte < $80) then
	903	exit;
	904
	905	case Codepage of
	906	932, 942, 943: // Japanese
	907	if CharByte in [ $A2, $A6, $B1..$DD ] then
	908	Result := true;
	909	874: // Thai
	910	Result := true;
	911	end;
	912	end;
	913
	914	// Check if this is the lead byte of a double-byte character. This is essential
	915	// to know in certain cases:
	916	// - Nothing must ever be inserted between such a byte and the next byte
	917	// (e.g. line break, tag, etc).
	918	// - Cursor position must never be set between such a byte and the next byte.
	919	// - Selection state must never change between such a byte and the next byte.
	920	//
	921	function IsDBCSLeadByte( const CharByte: Byte;
	922	const Codepage: LongInt ): boolean;
	923	begin
	924	Result := false;
	925
	926	case Codepage of
	927	932, 942, 943: // Japanese
	928	if CharByte in [ $81..$9F, $E0..$FC ] then
	929	Result := true;
	930	949: // Korean KSC
	931	if CharByte in [ $85..$FE ] then
	932	Result := true;
	933	1381: // Chinese GB2312
	934	if CharByte in [ $8C..$FE ] then
	935	Result := true;
	936	936, 950, 1386: // Chinese BIG-5 or GBK
	937	if CharByte in [ $81..$FE ] then
	938	Result := true;
	939	end;
	940	end;
	941
	942	// Check to see if this byte is a valid second byte in a double-byte character.
	943	// (This doesn't guarantee that it IS such a byte, only that it COULD be. The
	944	// caller is assumed to know whether we're in a double byte character or not.)
	945	//
	946	function IsDBCSSecondByte( const CharByte: Byte;
	947	const Codepage: LongInt ): boolean;
	948	begin
	949	Result := false;
	950
	951	case Codepage of
	952	932, 936, 942, 943, 949, 950, 1386:
	953	if CharByte >= $40 then
	954	Result := true;
	955	1381:
	956	if CharByte >= $A1 then
	957	Result := true;
	958	end;
	959	end;
	960
	961	// Given a string position, check to see if it's in the middle of a double-byte
	962	// character; if so, move back by one position so that we're sitting immediately
	963	// in front of the double-byte character instead.
	964	//
	965	procedure MoveToCharacterBoundary( TextPointer: PChar;
	966	var Index: LongInt;
	967	var Offset: LongInt;
	968	RowStart: LongInt;
	969	Codepage: LongInt );
	970	var
	971	P: PChar;
	972	NextP: PChar;
	973	Element: TTextElement;
	974	InsideDBC: boolean;
	975	begin
	976	if ( Offset > 0 ) and
	977	( Codepage in [ 932, 936, 942, 943, 949, 950, 1381, 1386 ]) then
	978	begin
	979	P := TextPointer + RowStart;
	980	InsideDBC := false;
	981
	982	// Because parsing of byte types is state based, we must verify every
	983	// byte's type from the beginning of the line until we reach the target.
	984	while RowStart < Index do
	985	begin
	986	Element := ExtractNextTextElement( P, NextP );
	987	CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
	988	P := NextP;
	989	inc( RowStart );
	990	end;
	991	{
	992	// We've reached the target position, and the current parsing state should
	993	// be correctly set. So now we can safely determine the target byte's type.
	994	Element := ExtractNextTextElement( P, NextP );
	995	CheckSpecialElementType( Element.Character, Element.ElementType, InsideDBC, Codepage );
	996	}
	997	if InsideDBC then
	998	begin
	999	// If this the first byte of a double byte character, move back by one.
	1000	dec( Index );
	1001	dec( Offset );
	1002	end;
	1003	end;
	1004
	1005	end;
	1006	//
	1007	// ALT ends
	1008
[15]	1009	Initialization
	1010	End.

Note: See TracBrowser for help on using the repository browser.

Download in other formats: