source: branches/2.20_branch/Components/RichTextDocumentUnit.pas@ 442

Last change on this file since 442 was 214, checked in by RBRi, 18 years ago

using StringUtilsUnit

  • Property svn:eol-style set to native
File size: 18.7 KB
Line 
1Unit RichTextDocumentUnit;
2
3// Declarations of tags, and parsing functions
4
5Interface
6
7uses
8 Classes;
9
10type
11 TTagType = ( ttInvalid,
12 ttBold, ttBoldOff,
13 ttItalic, ttItalicOff,
14 ttUnderline, ttUnderlineOff,
15 ttFixedWidthOn, ttFixedWidthOff,
16 ttHeading1, ttHeading2, ttHeading3, ttHeadingOff,
17 ttColor, ttColorOff,
18 ttBackgroundColor, ttBackgroundColorOff,
19 ttRed, ttBlue, ttGreen, ttBlack,
20 ttWrap,
21 ttAlign,
22 ttBeginLink, ttEndLink,
23 ttSetLeftMargin, ttSetRightMargin,
24 ttImage,
25 ttFont, ttFontOff,
26 ttEnd );
27
28 TStandardColor = record
29 Name: string[ 32 ];
30 Color: TColor;
31 end;
32
33 TTag = record
34 TagType: TTagType;
35 Arguments: string;
36 end;
37
38 TTextElementType = ( teText, // a character
39 teWordBreak,
40 teLineBreak, // end of para
41 teTextEnd,
42 teImage,
43 teStyle );
44
45 TTextElement = record
46 ElementType: TTextElementType;
47 Character: Char;
48 Tag: TTag;
49 end;
50
51 TTextAlignment = ( taLeft,
52 taRight,
53 taCenter );
54
55// Returns tag pointed to by TextPointer and
56// moves TextPointer to the first char after the tag.
57Function ExtractTag( Var TextPointer: PChar ): TTag;
58
59// Returns tag ending at TextPointer
60// (Expects textpointer is currently pointing at the >)
61// and moves TextPointer to the first char of the tag
62Function ExtractPreviousTag( const TextStart: PChar;
63 Var TextPointer: PChar ): TTag;
64
65function ExtractNextTextElement( TextPointer: PChar;
66 Var NextElement: PChar ): TTextElement;
67
68function ExtractPreviousTextElement( const TextStart: PChar;
69 TextPointer: PChar;
70 Var NextElement: PChar ): TTextElement;
71
72// Parse a color name or value (#hexval). Returns true if valid
73function GetTagColor( const ColorParam: string;
74 var Color: TColor ): boolean;
75
76function GetTagTextAlignment( const AlignParam: string;
77 const Default: TTextAlignment ): TTextAlignment;
78
79function GetTagTextWrap( const WrapParam: string ): boolean;
80
81// Search within a rich text document for the given text
82// if found, returns true, pMatch is set to the first match,
83// and MatchLength returns the length of the match
84// (which may be greater than the length of Text due to
85// to skipping tags)
86// if not found, returns false, pMatch is set to nil
87function RichTextFindString( pRichText: PChar;
88 const Text: string;
89 var pMatch: PChar;
90 var MatchLength: longint ): boolean;
91
92// Returns the start of the previous word,
93// or the current word if pStart is in the middle of the word
94function RichTextWordLeft( pRichText: PChar;
95 pStart: PChar ): PChar;
96
97// Returns the start of the next word.
98function RichTextWordRight( pStart: PChar ): PChar;
99
100// If pStart is in the middle of a word, then
101// returns true and sets the start and length of the word
102function RichTextWordAt( pRichText: PChar;
103 pStart: PChar;
104 Var pWordStart: PChar;
105 Var WordLength: longint ): boolean;
106
107// Copies plaintext of richtext starting at StartP
108// to the given buffer. Returns number of characters copied.
109// Buffer may be nil
110// If BufferLength is negative, it is effectively ignored
111function CopyPlainTextToBuffer( StartP: PChar;
112 EndP: PChar;
113 Buffer: PChar;
114 BufferLength: longint ): longint;
115
116Implementation
117
118uses
119 BseDOS, // for NLS/case mapping
120 SysUtils,
121 CharUtilsUnit,
122 StringUtilsUnit;
123
124const
125 TagStr: array[ ttInvalid .. ttEnd ] of string =
126 (
127 '', //
128 'b',
129 '/b',
130 'i',
131 '/i',
132 'u',
133 '/u',
134 'tt',
135 '/tt',
136 'h1',
137 'h2',
138 'h3',
139 '/h',
140 'color',
141 '/color',
142 'backcolor',
143 '/backcolor',
144 'red',
145 'blue',
146 'green',
147 'black',
148 'wrap',
149 'align',
150 'link',
151 '/link',
152 'leftmargin',
153 'rightmargin',
154 'image',
155 'font',
156 '/font',
157 ''
158 );
159
160 StandardColors: array[ 0..7 ] of TStandardColor =
161 (
162 ( Name : 'white' ; Color: clWhite ),
163 ( Name : 'black' ; Color: clBlack ),
164 ( Name : 'red' ; Color: clRed ),
165 ( Name : 'blue' ; Color: clBlue ),
166 ( Name : 'green' ; Color: clLime ),
167 ( Name : 'purple'; Color: clFuchsia ),
168 ( Name : 'yellow'; Color: clYellow ),
169 ( Name : 'cyan' ; Color: clAqua )
170 );
171
172Procedure ParseTag( const Text: string;
173 Var Tag: TTag );
174var
175 TagType: TTagType;
176 TagTypeText: string;
177 SpacePos: longint;
178begin
179 SpacePos := Pos( ' ', Text );
180 if SpacePos <> 0 then
181 begin
182 Tag.Arguments := trim( Copy( Text, SpacePos + 1, 255 ) );
183 TagTypeText := LowerCase( Copy( Text, 1, SpacePos - 1 ) );
184 end
185 else
186 begin
187 Tag.Arguments := ''; // to save time copying when not needed
188 TagTypeText := LowerCase( Text );
189 end;
190
191 for TagType := ttBold to ttEnd do
192 begin
193 if TagStr[ TagType ] = TagTypeText then
194 begin
195 Tag.TagType := TagType;
196 exit;
197 end;
198 end;
199
200 // not found
201 Tag.TagType := ttInvalid;
202end;
203
204var
205 TagText: string;
206 TagArgText: string;
207
208Function ExtractTag( Var TextPointer: PChar ): TTag;
209var
210 CurrentChar: Char;
211 TagTooLong: boolean;
212 InQuote: boolean;
213begin
214// assert( TextPointer[ 0 ] = '<' );
215 TagText := '';
216 TagTooLong := false;
217 InQuote := false;
218
219 repeat
220 CurrentChar := TextPointer^;
221
222 if ( CurrentChar = '>' )
223 and ( not InQuote ) then
224 begin
225 // found tag end.
226 if TagTooLong then
227 Result.TagType := ttInvalid
228 else
229 ParseTag( TagText, Result );
230 inc( TextPointer );
231 exit;
232 end;
233
234 if CurrentChar = #0 then
235 begin
236 // if we reach here we have reached the end of text
237 // during a tag. invalid tag.
238 Result.TagType := ttInvalid;
239 exit;
240 end;
241
242 if CurrentChar = CharDoubleQuote then
243 begin
244 if not InQuote then
245 begin
246 InQuote := true
247 end
248 else
249 begin
250 // Could be escaped quote ""
251 if ( TextPointer + 1 ) ^ = CharDoubleQuote then
252 begin
253 // yes it is
254 inc( TextPointer ); // skip second one
255 end
256 else
257 begin
258 // no, not an escaped quote
259 InQuote := false;
260 end;
261 end;
262
263 end;
264
265 if not TagTooLong then
266 if Length( TagText ) < 200 then
267 TagText := TagText + CurrentChar
268 else
269 TagTooLong := true; // but keep going until the end
270
271 inc( TextPointer );
272 until false;
273
274end;
275
276// Expects textpointer is currently pointing at the >
277Function ExtractPreviousTag( const TextStart: PChar;
278 Var TextPointer: PChar ): TTag;
279var
280 CurrentChar: Char;
281 TagTooLong: boolean;
282 InQuote: boolean;
283begin
284 TagText := '';
285 TagTooLong := false;
286 InQuote := false;
287
288 repeat
289 dec( TextPointer );
290 if TextPointer < TextStart then
291 begin
292 // if we reach here we have reached the end of text
293 // during a tag. invalid tag.
294 Result.TagType := ttInvalid;
295 exit;
296 end;
297 CurrentChar := TextPointer^;
298
299 if ( CurrentChar = '<' )
300 and ( not InQuote ) then
301 begin
302 // found tag end.
303 if TagTooLong then
304 Result.TagType := ttInvalid
305 else
306 ParseTag( TagText, Result );
307 exit;
308 end;
309
310 if CurrentChar = CharDoubleQuote then
311 begin
312 if not InQuote then
313 begin
314 InQuote := true
315 end
316 else
317 begin
318 // Could be escaped quote ""
319 if TextPointer <= TextStart then
320 begin
321 // start of text... somethin weird
322 InQuote := false;
323 end
324 else if ( TextPointer - 1 ) ^ = CharDoubleQuote then
325 begin
326 // yes it is
327 dec( TextPointer ); // skip second one
328 end
329 else
330 begin
331 // no, not an escaped quote
332 InQuote := false;
333 end;
334 end;
335
336 end;
337
338 if not TagTooLong then
339 if Length( TagText ) < 200 then
340 TagText := CurrentChar + TagText
341 else
342 TagTooLong := true; // but keep going until the end
343
344 until false;
345
346end;
347
348function ExtractNextTextElement( TextPointer: PChar;
349 Var NextElement: PChar ): TTextElement;
350var
351 TheChar: Char;
352 NextChar: char;
353begin
354 with Result do
355 begin
356 TheChar := TextPointer^;
357 Character := TheChar;
358 inc( TextPointer );
359
360 case TheChar of
361 ' ': // ---- Space (word break) found ----
362 ElementType := teWordBreak;
363
364 #10, #13: // ---- End of line found ----
365 begin
366 ElementType := teLineBreak;
367 if TheChar = #13 then
368 begin
369 TheChar := TextPointer^;
370 if TheChar = #10 then
371 // skip CR following LF
372 inc( TextPointer );
373 end;
374 end;
375
376 #0: // ---- end of text found ----
377 ElementType := teTextEnd;
378
379 '<': // ---- tag found? ----
380 begin
381 NextChar := TextPointer^;
382 if NextChar = '<' then
383 begin
384 // no. just a literal <
385 ElementType := teText;
386 inc( TextPointer );
387 end
388 else
389 begin
390 Tag := ExtractTag( TextPointer );
391 if Tag.TagType = ttImage then
392 ElementType := teImage
393 else
394 ElementType := teStyle;
395 end;
396
397 end;
398
399 '>': // check - should be double
400 begin
401 ElementType := teText;
402 NextChar := TextPointer^;
403 if NextChar = '>' then
404 inc( TextPointer );
405 end;
406
407 else
408 ElementType := teText;
409 end;
410 end; // with
411 NextElement := TextPointer;
412end;
413
414function ExtractPreviousTextElement( const TextStart: PChar;
415 TextPointer: PChar;
416 Var NextElement: PChar ): TTextElement;
417var
418 TheChar: Char;
419 PreviousChar: Char;
420 FoundTag: boolean;
421begin
422 with Result do
423 begin
424 dec( TextPointer );
425 TheChar := TextPointer^;
426 Character := TheChar;
427 if TextPointer < TextStart then
428 begin
429 ElementType := teTextEnd;
430 exit;
431 end;
432
433 case TheChar of
434 ' ': // ---- Space (word break) found ----
435 ElementType := teWordBreak;
436
437 #10, #13: // ---- End of line found ----
438 begin
439 ElementType := teLineBreak;
440 if TheChar = #10 then
441 begin
442 dec( TextPointer );
443 TheChar := TextPointer^;
444 if TheChar = #13 then
445 begin
446 // skip CR preceeding LF
447 end
448 else
449 inc( TextPointer );
450 end;
451 end;
452
453 '>': // ---- tag found ----
454 begin
455 FoundTag := true;
456 if TextPointer > TextStart then
457 begin
458 PreviousChar := ( TextPointer - 1 )^;
459 if PreviousChar = '>' then
460 begin
461 // no. just a literal >
462 FoundTag := false;
463 ElementType := teText;
464 dec( TextPointer );
465 end
466 end;
467
468 if FoundTag then
469 begin
470 Tag := ExtractPreviousTag( TextStart, TextPointer );
471 if Tag.TagType = ttImage then
472 ElementType := teImage
473 else
474 ElementType := teStyle;
475 end;
476 end;
477
478 '<': // should be double
479 begin
480 ElementType := teText;
481 if TextPointer > TextStart then
482 begin
483 PreviousChar := TextPointer^;
484 if PreviousChar = '<' then
485 dec( TextPointer );
486 end;
487 end
488 else
489 ElementType := teText;
490 end;
491 end; // with
492 NextElement := TextPointer;
493end;
494
495function GetTagColor( const ColorParam: string;
496 var Color: TColor ): boolean;
497var
498 ColorIndex: longint;
499begin
500 Result := false;
501 if ColorParam <> '' then
502 begin
503 if ColorParam[ 1 ] = '#' then
504 begin
505 try
506 Color := HexStrToLongInt( StrSubstringFrom( ColorParam, 2 ) );
507 Result := true;
508 except
509 end;
510 end
511 else
512 begin
513 for ColorIndex := 0 to High( StandardColors ) do
514 begin
515 if StrEqualIgnoringCase( ColorParam, StandardColors[ ColorIndex ].Name ) then
516 begin
517 Color := StandardColors[ ColorIndex ].Color;
518 Result := true;
519 break;
520 end;
521 end;
522 end;
523 end;
524end;
525
526function GetTagTextAlignment( const AlignParam: string;
527 const Default: TTextAlignment ): TTextAlignment;
528begin
529 if StrEqualIgnoringCase( AlignParam, 'left' ) then
530 Result := taLeft
531 else if StrEqualIgnoringCase( AlignParam, 'center' ) then
532 Result := taCenter
533 else if StrEqualIgnoringCase( AlignParam, 'right' ) then
534 Result := taRight
535 else
536 Result := Default;
537end;
538
539function GetTagTextWrap( const WrapParam: string ): boolean;
540begin
541 Result := StrEqualIgnoringCase( WrapParam, 'yes' );
542end;
543
544function RichTextFindString( pRichText: PChar;
545 const Text: string;
546 var pMatch: PChar;
547 var MatchLength: longint ): boolean;
548var
549 P: PChar;
550 NextP: PChar;
551 Element: TTextElement;
552 pMatchStart: pchar;
553 pMatchStartNext: pchar;
554 MatchIndex: longint;
555
556 CountryData: COUNTRYCODE;
557 CaseMap: array[ Low( Char )..High( Char ) ] of char;
558 C: Char;
559begin
560 if Length( Text ) = 0 then
561 begin
562 // null string always matches
563 Result := true;
564 pMatch := pRichText;
565 MatchLength := 0;
566 exit;
567 end;
568
569 P := pRichText;
570
571 MatchIndex := 1;
572
573 // Get case mapping of all chars (only SBCS)
574
575 CountryData.Country := 0; // default country
576 CountryData.CodePage := 0; // default codepage
577
578 // fill array with all chars
579 for C := Low( CaseMap ) to High( CaseMap ) do
580 CaseMap[ C ] := C;
581
582 DosMapCase( sizeof( CaseMap ),
583 CountryData,
584 CaseMap );
585
586 // Now search, case insensitively
587
588 while true do
589 begin
590 Element := ExtractNextTextElement( P, NextP );
591
592 case Element.ElementType of
593 teTextEnd:
594 // end of text
595 break;
596
597 teImage,
598 teLineBreak:
599 // breaks a potential match
600 MatchIndex := 1;
601
602 teStyle:
603 ; // ignore, matches can continue
604
605 else
606 begin
607 if CaseMap[ Element.Character ]
608 = CaseMap[ Text[ MatchIndex ] ] then
609 begin
610 // found a match
611 if MatchIndex = 1 then
612 begin
613 pMatchStart := P; // store start of match
614 pMatchStartNext := NextP;
615 end;
616
617 inc( MatchIndex );
618 if MatchIndex > Length( Text ) then
619 begin
620 // found a complete match
621 Result := true;
622 pMatch := pMatchStart;
623 MatchLength := PCharPointerDiff( P, pMatchStart )
624 + 1; // include this char
625 exit;
626 end;
627 end
628 else
629 begin
630 // not a match
631 if MatchIndex > 1 then
632 begin
633 // go back to start of match, + 1
634 NextP := pMatchStartNext;
635 MatchIndex := 1;
636 end;
637 end;
638 end;
639 end;
640
641 P := NextP;
642 end;
643
644 // no match found
645 Result := false;
646 pMatch := nil;
647 MatchLength := 0;
648end;
649
650function RichTextWordLeft( pRichText: PChar;
651 pStart: PChar ): PChar;
652Var
653 P: PChar;
654 NextP: PChar;
655 Element: TTextElement;
656begin
657 P := pStart;
658
659 // skip whitespace/tags...
660 Element := ExtractPreviousTextElement( pRichText, P, NextP );
661 P := NextP;
662 while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
663 begin
664 Element := ExtractPreviousTextElement( pRichText, P, NextP );
665 P := NextP;
666 end;
667 if Element.ElementType = teTextEnd then
668 begin
669 Result := P;
670 // out of text
671 exit;
672 end;
673
674 // back to start of word, skip text/tags
675 while true do
676 begin
677 Element := ExtractPreviousTextElement( pRichText, P, NextP );
678 if not ( Element.ElementType in [ teText, teStyle ] ) then
679 break;
680 P := NextP;
681 end;
682 Result := P;
683end;
684
685function RichTextWordRight( pStart: PChar ): PChar;
686Var
687 P: PChar;
688 NextP: PChar;
689 Element: TTextElement;
690begin
691 P := pStart;
692
693 // skip text/tags...
694 Element := ExtractNextTextElement( P, NextP );
695 while Element.ElementType in [ teStyle, teText ] do
696 begin
697 P := NextP;
698 Element := ExtractNextTextElement( P, NextP );
699 end;
700 if Element.ElementType <> teTextEnd then
701 begin
702 // skip whitespace
703 Element := ExtractNextTextElement( P, NextP );
704 while Element.ElementType in [ teWordBreak, teLineBreak, teImage, teStyle ] do
705 begin
706 P := NextP;
707 Element := ExtractNextTextElement( P, NextP );
708 end;
709 end;
710
711 Result := P;
712end;
713
714function RichTextWordAt( pRichText: PChar;
715 pStart: PChar;
716 Var pWordStart: PChar;
717 Var WordLength: longint ): boolean;
718Var
719 P: PChar;
720 NextP: PChar;
721 Element: TTextElement;
722 pWordEnd: PChar;
723begin
724 P := pStart;
725 Element := ExtractNextTextElement( P, NextP );
726 if not ( Element.ElementType in [ teStyle, teText ] ) then
727 begin
728 // not in a word.
729 result := false;
730 pWordStart := nil;
731 WordLength := 0;
732 exit;
733 end;
734 // find end of the word
735 while Element.ElementType in [ teStyle, teText ] do
736 begin
737 P := NextP;
738 Element := ExtractNextTextElement( P, NextP );
739 end;
740 pWordEnd := P;
741
742 P := pStart;
743 Element := ExtractPreviousTextElement( pRichText, P, NextP );
744 while Element.ElementType in [ teStyle, teText ] do
745 begin
746 P := NextP;
747 Element := ExtractPreviousTextElement( pRichText, P, NextP );
748 end;
749 pWordStart := P;
750 WordLength := PCharPointerDiff( pWordEnd, pWordStart );
751 Result := true;
752end;
753
754function CopyPlainTextToBuffer( StartP: PChar;
755 EndP: PChar;
756 Buffer: PChar;
757 BufferLength: longint ): longint;
758var
759 Q: PChar;
760 EndQ: Pchar;
761 P: PChar;
762 NextP: PChar;
763 Element: TTextElement;
764begin
765 P := StartP;
766 Q := Buffer;
767 EndQ := Buffer + BufferLength;
768
769 while P < EndP do
770 begin
771 Element := ExtractNextTextElement( P, NextP );
772 case Element.ElementType of
773 teText, teWordBreak:
774 begin
775 // copy char
776 if Buffer <> nil then
777 Q[ 0 ] := Element.Character;
778 inc( Q );
779 end;
780
781 teLineBreak:
782 begin
783 if Buffer <> nil then
784 Q[ 0 ] := #13;
785 inc( Q );
786 if Q = EndQ then
787 // end of buffer
788 break;
789
790 if Buffer <> nil then
791 Q[ 0 ] := #10;
792 inc( Q );
793 end;
794 end;
795
796 if Q = EndQ then
797 // end of buffer
798 break;
799
800 P := NextP;
801 end;
802 result := PCharPointerDiff( Q, Buffer );
803end;
804
805Initialization
806End.
Note: See TracBrowser for help on using the repository browser.