Context Navigation

source: branches/2.20_branch/Components/RichTextDocumentUnit.pas@ 442

Last change on this file since 442 was 214, checked in by RBRi, 18 years ago
using StringUtilsUnit
Property svn:eol-style set to `native`
File size: 18.7 KB

Line
1	Unit RichTextDocumentUnit;
2
3	// Declarations of tags, and parsing functions
4
5	Interface
6
7	uses
8	Classes;
9
10	type
11	TTagType = ( ttInvalid,
12	ttBold, ttBoldOff,
13	ttItalic, ttItalicOff,
14	ttUnderline, ttUnderlineOff,
15	ttFixedWidthOn, ttFixedWidthOff,
16	ttHeading1, ttHeading2, ttHeading3, ttHeadingOff,
17	ttColor, ttColorOff,
18	ttBackgroundColor, ttBackgroundColorOff,
19	ttRed, ttBlue, ttGreen, ttBlack,
20	ttWrap,
21	ttAlign,
22	ttBeginLink, ttEndLink,
23	ttSetLeftMargin, ttSetRightMargin,
24	ttImage,
25	ttFont, ttFontOff,
26	ttEnd );
27
28	TStandardColor = record
29	Name: string[ 32 ];
30	Color: TColor;
31	end;
32
33	TTag = record
34	TagType: TTagType;
35	Arguments: string;
36	end;
37
38	TTextElementType = ( teText, // a character
39	teWordBreak,
40	teLineBreak, // end of para
41	teTextEnd,
42	teImage,
43	teStyle );
44
45	TTextElement = record
46	ElementType: TTextElementType;
47	Character: Char;
48	Tag: TTag;
49	end;
50
51	TTextAlignment = ( taLeft,
52	taRight,
53	taCenter );
54
55	// Returns tag pointed to by TextPointer and
56	// moves TextPointer to the first char after the tag.
57	Function ExtractTag( Var TextPointer: PChar ): TTag;
58
59	// Returns tag ending at TextPointer
60	// (Expects textpointer is currently pointing at the >)
61	// and moves TextPointer to the first char of the tag
62	Function ExtractPreviousTag( const TextStart: PChar;
63	Var TextPointer: PChar ): TTag;
64
65	function ExtractNextTextElement( TextPointer: PChar;
66	Var NextElement: PChar ): TTextElement;
67
68	function ExtractPreviousTextElement( const TextStart: PChar;
69	TextPointer: PChar;
70	Var NextElement: PChar ): TTextElement;
71
72	// Parse a color name or value (#hexval). Returns true if valid
73	function GetTagColor( const ColorParam: string;
74	var Color: TColor ): boolean;
75
76	function GetTagTextAlignment( const AlignParam: string;
77	const Default: TTextAlignment ): TTextAlignment;
78
79	function GetTagTextWrap( const WrapParam: string ): boolean;
80
81	// Search within a rich text document for the given text
82	// if found, returns true, pMatch is set to the first match,
83	// and MatchLength returns the length of the match
84	// (which may be greater than the length of Text due to
85	// to skipping tags)
86	// if not found, returns false, pMatch is set to nil
87	function RichTextFindString( pRichText: PChar;
88	const Text: string;
89	var pMatch: PChar;
90	var MatchLength: longint ): boolean;
91
92	// Returns the start of the previous word,
93	// or the current word if pStart is in the middle of the word
94	function RichTextWordLeft( pRichText: PChar;
95	pStart: PChar ): PChar;
96
97	// Returns the start of the next word.
98	function RichTextWordRight( pStart: PChar ): PChar;
99
100	// If pStart is in the middle of a word, then
101	// returns true and sets the start and length of the word
102	function RichTextWordAt( pRichText: PChar;
103	pStart: PChar;
104	Var pWordStart: PChar;
105	Var WordLength: longint ): boolean;
106
107	// Copies plaintext of richtext starting at StartP
108	// to the given buffer. Returns number of characters copied.
109	// Buffer may be nil
110	// If BufferLength is negative, it is effectively ignored
111	function CopyPlainTextToBuffer( StartP: PChar;
112	EndP: PChar;
113	Buffer: PChar;
114	BufferLength: longint ): longint;
115
116	Implementation
117
118	uses
119	BseDOS, // for NLS/case mapping
120	SysUtils,
121	CharUtilsUnit,
122	StringUtilsUnit;
123
124	const
125	TagStr: array[ ttInvalid .. ttEnd ] of string =
126	(
127	'', //
128	'b',
129	'/b',
130	'i',
131	'/i',
132	'u',
133	'/u',
134	'tt',
135	'/tt',
136	'h1',
137	'h2',
138	'h3',
139	'/h',
140	'color',
141	'/color',
142	'backcolor',
143	'/backcolor',
144	'red',
145	'blue',
146	'green',
147	'black',
148	'wrap',
149	'align',
150	'link',
151	'/link',
152	'leftmargin',
153	'rightmargin',
154	'image',
155	'font',
156	'/font',
157	''
158	);
159
160	StandardColors: array[ 0..7 ] of TStandardColor =
161	(
162	( Name : 'white' ; Color: clWhite ),
163	( Name : 'black' ; Color: clBlack ),
164	( Name : 'red' ; Color: clRed ),
165	( Name : 'blue' ; Color: clBlue ),
166	( Name : 'green' ; Color: clLime ),
167	( Name : 'purple'; Color: clFuchsia ),
168	( Name : 'yellow'; Color: clYellow ),
169	( Name : 'cyan' ; Color: clAqua )
170	);
171
172	Procedure ParseTag( const Text: string;
173	Var Tag: TTag );
174	var
175	TagType: TTagType;
176	TagTypeText: string;
177	SpacePos: longint;
178	begin
179	SpacePos := Pos( ' ', Text );
180	if SpacePos <> 0 then
181	begin
182	Tag.Arguments := trim( Copy( Text, SpacePos + 1, 255 ) );
183	TagTypeText := LowerCase( Copy( Text, 1, SpacePos - 1 ) );
184	end
185	else
186	begin
187	Tag.Arguments := ''; // to save time copying when not needed
188	TagTypeText := LowerCase( Text );
189	end;
190
191	for TagType := ttBold to ttEnd do
192	begin
193	if TagStr[ TagType ] = TagTypeText then
194	begin
195	Tag.TagType := TagType;
196	exit;
197	end;
198	end;
199
200	// not found
201	Tag.TagType := ttInvalid;
202	end;
203
204	var
205	TagText: string;
206	TagArgText: string;
207
208	Function ExtractTag( Var TextPointer: PChar ): TTag;
209	var
210	CurrentChar: Char;
211	TagTooLong: boolean;
212	InQuote: boolean;
213	begin
214	// assert( TextPointer[ 0 ] = '<' );
215	TagText := '';
216	TagTooLong := false;
217	InQuote := false;
218
219	repeat
220	CurrentChar := TextPointer^;
221
222	if ( CurrentChar = '>' )
223	and ( not InQuote ) then
224	begin
225	// found tag end.
226	if TagTooLong then
227	Result.TagType := ttInvalid
228	else
229	ParseTag( TagText, Result );
230	inc( TextPointer );
231	exit;
232	end;
233
234	if CurrentChar = #0 then
235	begin
236	// if we reach here we have reached the end of text
237	// during a tag. invalid tag.
238	Result.TagType := ttInvalid;
239	exit;
240	end;
241
242	if CurrentChar = CharDoubleQuote then
243	begin
244	if not InQuote then
245	begin
246	InQuote := true
247	end
248	else
249	begin
250	// Could be escaped quote ""
251	if ( TextPointer + 1 ) ^ = CharDoubleQuote then
252	begin
253	// yes it is
254	inc( TextPointer ); // skip second one
255	end
256	else
257	begin
258	// no, not an escaped quote
259	InQuote := false;
260	end;
261	end;
262
263	end;
264
265	if not TagTooLong then
266	if Length( TagText ) < 200 then
267	TagText := TagText + CurrentChar
268	else
269	TagTooLong := true; // but keep going until the end
270
271	inc( TextPointer );
272	until false;
273
274	end;
275
276	// Expects textpointer is currently pointing at the >
277	Function ExtractPreviousTag( const TextStart: PChar;
278	Var TextPointer: PChar ): TTag;
279	var
280	CurrentChar: Char;
281	TagTooLong: boolean;
282	InQuote: boolean;
283	begin
284	TagText := '';
285	TagTooLong := false;
286	InQuote := false;
287
288	repeat
289	dec( TextPointer );
290	if TextPointer < TextStart then
291	begin
292	// if we reach here we have reached the end of text
293	// during a tag. invalid tag.
294	Result.TagType := ttInvalid;
295	exit;
296	end;
297	CurrentChar := TextPointer^;
298
299	if ( CurrentChar = '<' )
300	and ( not InQuote ) then
301	begin
302	// found tag end.
303	if TagTooLong then
304	Result.TagType := ttInvalid
305	else
306	ParseTag( TagText, Result );
307	exit;
308	end;
309
310	if CurrentChar = CharDoubleQuote then
311	begin
312	if not InQuote then
313	begin
314	InQuote := true
315	end
316	else
317	begin
318	// Could be escaped quote ""
319	if TextPointer <= TextStart then
320	begin
321	// start of text... somethin weird
322	InQuote := false;
323	end
324	else if ( TextPointer - 1 ) ^ = CharDoubleQuote then
325	begin
326	// yes it is
327	dec( TextPointer ); // skip second one
328	end
329	else
330	begin
331	// no, not an escaped quote
332	InQuote := false;
333	end;
334	end;
335
336	end;
337
338	if not TagTooLong then
339	if Length( TagText ) < 200 then
340	TagText := CurrentChar + TagText
341	else
342	TagTooLong := true; // but keep going until the end
343
344	until false;
345
346	end;
347
348	function ExtractNextTextElement( TextPointer: PChar;
349	Var NextElement: PChar ): TTextElement;
350	var
351	TheChar: Char;
352	NextChar: char;
353	begin
354	with Result do
355	begin
356	TheChar := TextPointer^;
357	Character := TheChar;
358	inc( TextPointer );
359
360	case TheChar of
361	' ': // ---- Space (word break) found ----
362	ElementType := teWordBreak;
363
364	#10, #13: // ---- End of line found ----
365	begin
366	ElementType := teLineBreak;
367	if TheChar = #13 then
368	begin
369	TheChar := TextPointer^;
370	if TheChar = #10 then
371	// skip CR following LF
372	inc( TextPointer );
373	end;
374	end;
375
376	#0: // ---- end of text found ----
377	ElementType := teTextEnd;
378
379	'<': // ---- tag found? ----
380	begin
381	NextChar := TextPointer^;
382	if NextChar = '<' then
383	begin
384	// no. just a literal <
385	ElementType := teText;
386	inc( TextPointer );
387	end
388	else
389	begin
390	Tag := ExtractTag( TextPointer );
391	if Tag.TagType = ttImage then
392	ElementType := teImage
393	else
394	ElementType := teStyle;
395	end;
396
397	end;
398
399	'>': // check - should be double
400	begin
401	ElementType := teText;
402	NextChar := TextPointer^;
403	if NextChar = '>' then
404	inc( TextPointer );
405	end;
406
407	else
408	ElementType := teText;
409	end;
410	end; // with
411	NextElement := TextPointer;
412	end;
413
414	function ExtractPreviousTextElement( const TextStart: PChar;
415	TextPointer: PChar;
416	Var NextElement: PChar ): TTextElement;
417	var
418	TheChar: Char;
419	PreviousChar: Char;
420	FoundTag: boolean;
421	begin
422	with Result do
423	begin
424	dec( TextPointer );
425	TheChar := TextPointer^;
426	Character := TheChar;
427	if TextPointer < TextStart then
428	begin
429	ElementType := teTextEnd;
430	exit;
431	end;
432
433	case TheChar of
434	' ': // ---- Space (word break) found ----
435	ElementType := teWordBreak;
436
437	#10, #13: // ---- End of line found ----
438	begin
439	ElementType := teLineBreak;
440	if TheChar = #10 then
441	begin
442	dec( TextPointer );
443	TheChar := TextPointer^;
444	if TheChar = #13 then
445	begin
446	// skip CR preceeding LF
447	end
448	else
449	inc( TextPointer );
450	end;
451	end;
452
453	'>': // ---- tag found ----
454	begin
455	FoundTag := true;
456	if TextPointer > TextStart then
457	begin
458	PreviousChar := ( TextPointer - 1 )^;
459	if PreviousChar = '>' then
460	begin
461	// no. just a literal >
462	FoundTag := false;
463	ElementType := teText;
464	dec( TextPointer );
465	end
466	end;
467
468	if FoundTag then
469	begin
470	Tag := ExtractPreviousTag( TextStart, TextPointer );
471	if Tag.TagType = ttImage then
472	ElementType := teImage
473	else
474	ElementType := teStyle;
475	end;
476	end;
477
478	'<': // should be double
479	begin
480	ElementType := teText;
481	if TextPointer > TextStart then
482	begin
483	PreviousChar := TextPointer^;
484	if PreviousChar = '<' then
485	dec( TextPointer );
486	end;
487	end
488	else
489	ElementType := teText;
490	end;
491	end; // with
492	NextElement := TextPointer;
493	end;
494
495	function GetTagColor( const ColorParam: string;
496	var Color: TColor ): boolean;
497	var
498	ColorIndex: longint;
499	begin
500	Result := false;
501	if ColorParam <> '' then
502	begin
503	if ColorParam[ 1 ] = '#' then
504	begin
505	try
506	Color := HexStrToLongInt( StrSubstringFrom( ColorParam, 2 ) );
507	Result := true;
508	except
509	end;
510	end
511	else
512	begin
513	for ColorIndex := 0 to High( StandardColors ) do
514	begin
515	if StrEqualIgnoringCase( ColorParam, StandardColors[ ColorIndex ].Name ) then
516	begin
517	Color := StandardColors[ ColorIndex ].Color;
518	Result := true;
519	break;
520	end;
521	end;
522	end;
523	end;
524	end;
525
526	function GetTagTextAlignment( const AlignParam: string;
527	const Default: TTextAlignment ): TTextAlignment;
528	begin
529	if StrEqualIgnoringCase( AlignParam, 'left' ) then
530	Result := taLeft
531	else if StrEqualIgnoringCase( AlignParam, 'center' ) then
532	Result := taCenter
533	else if StrEqualIgnoringCase( AlignParam, 'right' ) then
534	Result := taRight
535	else
536	Result := Default;
537	end;
538
539	function GetTagTextWrap( const WrapParam: string ): boolean;
540	begin
541	Result := StrEqualIgnoringCase( WrapParam, 'yes' );
542	end;
543
544	function RichTextFindString( pRichText: PChar;
545	const Text: string;
546	var pMatch: PChar;
547	var MatchLength: longint ): boolean;
548	var
549	P: PChar;
550	NextP: PChar;
551	Element: TTextElement;
552	pMatchStart: pchar;
553	pMatchStartNext: pchar;
554	MatchIndex: longint;
555
556	CountryData: COUNTRYCODE;
557	CaseMap: array[ Low( Char )..High( Char ) ] of char;
558	C: Char;
559	begin
560	if Length( Text ) = 0 then
561	begin
562	// null string always matches
563	Result := true;
564	pMatch := pRichText;
565	MatchLength := 0;
566	exit;
567	end;
568
569	P := pRichText;
570
571	MatchIndex := 1;
572
573	// Get case mapping of all chars (only SBCS)
574
575	CountryData.Country := 0; // default country
576	CountryData.CodePage := 0; // default codepage
577
578	// fill array with all chars
579	for C := Low( CaseMap ) to High( CaseMap ) do
580	CaseMap[ C ] := C;
581
582	DosMapCase( sizeof( CaseMap ),
583	CountryData,
584	CaseMap );
585
586	// Now search, case insensitively
587
588	while true do
589	begin
590	Element := ExtractNextTextElement( P, NextP );
591
592	case Element.ElementType of
593	teTextEnd:
594	// end of text
595	break;
596
597	teImage,
598	teLineBreak:
599	// breaks a potential match
600	MatchIndex := 1;
601
602	teStyle:
603	; // ignore, matches can continue
604
605	else
606	begin
607	if CaseMap[ Element.Character ]
608	= CaseMap[ Text[ MatchIndex ] ] then
609	begin
610	// found a match
611	if MatchIndex = 1 then
612	begin
613	pMatchStart := P; // store start of match
614	pMatchStartNext := NextP;
615	end;
616
617	inc( MatchIndex );
618	if MatchIndex > Length( Text ) then
619	begin
620	// found a complete match
621	Result := true;
622	pMatch := pMatchStart;
623	MatchLength := PCharPointerDiff( P, pMatchStart )
624	+ 1; // include this char
625	exit;
626	end;
627	end
628	else
629	begin
630	// not a match
631	if MatchIndex > 1 then
632	begin
633	// go back to start of match, + 1
634	NextP := pMatchStartNext;
635	MatchIndex := 1;
636	end;
637	end;
638	end;
639	end;
640
641	P := NextP;
642	end;
643
644	// no match found
645	Result := false;
646	pMatch := nil;
647	MatchLength := 0;
648	end;
649
650	function RichTextWordLeft( pRichText: PChar;
651	pStart: PChar ): PChar;
652	Var
653	P: PChar;
654	NextP: PChar;
655	Element: TTextElement;
656	begin
657	P := pStart;
658
659	// skip whitespace/tags...
660	Element := ExtractPreviousTextElement( pRichText, P, NextP );
661	P := NextP;
662	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
663	begin
664	Element := ExtractPreviousTextElement( pRichText, P, NextP );
665	P := NextP;
666	end;
667	if Element.ElementType = teTextEnd then
668	begin
669	Result := P;
670	// out of text
671	exit;
672	end;
673
674	// back to start of word, skip text/tags
675	while true do
676	begin
677	Element := ExtractPreviousTextElement( pRichText, P, NextP );
678	if not ( Element.ElementType in [ teText, teStyle ] ) then
679	break;
680	P := NextP;
681	end;
682	Result := P;
683	end;
684
685	function RichTextWordRight( pStart: PChar ): PChar;
686	Var
687	P: PChar;
688	NextP: PChar;
689	Element: TTextElement;
690	begin
691	P := pStart;
692
693	// skip text/tags...
694	Element := ExtractNextTextElement( P, NextP );
695	while Element.ElementType in [ teStyle, teText ] do
696	begin
697	P := NextP;
698	Element := ExtractNextTextElement( P, NextP );
699	end;
700	if Element.ElementType <> teTextEnd then
701	begin
702	// skip whitespace
703	Element := ExtractNextTextElement( P, NextP );
704	while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
705	begin
706	P := NextP;
707	Element := ExtractNextTextElement( P, NextP );
708	end;
709	end;
710
711	Result := P;
712	end;
713
714	function RichTextWordAt( pRichText: PChar;
715	pStart: PChar;
716	Var pWordStart: PChar;
717	Var WordLength: longint ): boolean;
718	Var
719	P: PChar;
720	NextP: PChar;
721	Element: TTextElement;
722	pWordEnd: PChar;
723	begin
724	P := pStart;
725	Element := ExtractNextTextElement( P, NextP );
726	if not ( Element.ElementType in [ teStyle, teText ] ) then
727	begin
728	// not in a word.
729	result := false;
730	pWordStart := nil;
731	WordLength := 0;
732	exit;
733	end;
734	// find end of the word
735	while Element.ElementType in [ teStyle, teText ] do
736	begin
737	P := NextP;
738	Element := ExtractNextTextElement( P, NextP );
739	end;
740	pWordEnd := P;
741
742	P := pStart;
743	Element := ExtractPreviousTextElement( pRichText, P, NextP );
744	while Element.ElementType in [ teStyle, teText ] do
745	begin
746	P := NextP;
747	Element := ExtractPreviousTextElement( pRichText, P, NextP );
748	end;
749	pWordStart := P;
750	WordLength := PCharPointerDiff( pWordEnd, pWordStart );
751	Result := true;
752	end;
753
754	function CopyPlainTextToBuffer( StartP: PChar;
755	EndP: PChar;
756	Buffer: PChar;
757	BufferLength: longint ): longint;
758	var
759	Q: PChar;
760	EndQ: Pchar;
761	P: PChar;
762	NextP: PChar;
763	Element: TTextElement;
764	begin
765	P := StartP;
766	Q := Buffer;
767	EndQ := Buffer + BufferLength;
768
769	while P < EndP do
770	begin
771	Element := ExtractNextTextElement( P, NextP );
772	case Element.ElementType of
773	teText, teWordBreak:
774	begin
775	// copy char
776	if Buffer <> nil then
777	Q[ 0 ] := Element.Character;
778	inc( Q );
779	end;
780
781	teLineBreak:
782	begin
783	if Buffer <> nil then
784	Q[ 0 ] := #13;
785	inc( Q );
786	if Q = EndQ then
787	// end of buffer
788	break;
789
790	if Buffer <> nil then
791	Q[ 0 ] := #10;
792	inc( Q );
793	end;
794	end;
795
796	if Q = EndQ then
797	// end of buffer
798	break;
799
800	P := NextP;
801	end;
802	result := PCharPointerDiff( Q, Buffer );
803	end;
804
805	Initialization
806	End.

Note: See TracBrowser for help on using the repository browser.

Download in other formats: