Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

textv_html.c

Visit:

Last change on this file was 238, checked in by umoeller, 23 years ago
Misc fixes.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 61.8 KB

Rev	Line
[8]	1
	2	/*
	3	*@@sourcefile textv_html.c:
	4	* this code converts HTML code to escape sequences for the
	5	* XTextView control (textview.c).
	6	*
	7	* This code is in part ugly spaghetti, but this is intentional to
	8	* make this HTML parser FAST. In general, you get about double or
	9	* triple the speed compared to Netscape 4.6 on OS/2. This code
	10	* doesn't understand all of HTML though, but you get most of HTML 2.
	11	* There's no tables or frames at this point.
	12	*
	13	* The entry point into this mess is txvConvertFromHTML, which
	14	* is easy to use.
	15	*
	16	* Note: Version numbering in this file relates to XWorkplace version
	17	* numbering.
	18	*
	19	*@@header "helpers\textv_html.h"
	20	*
	21	*@@added V0.9.3 (2000-05-10) [umoeller]
	22	*/
	23
	24	/*
	25	* Copyright (C) 2000 Ulrich Mller.
	26	* This program is part of the XWorkplace package.
	27	* This program is free software; you can redistribute it and/or modify
	28	* it under the terms of the GNU General Public License as published by
	29	* the Free Software Foundation, in version 2 as it comes in the COPYING
	30	* file of the XWorkplace main distribution.
	31	* This program is distributed in the hope that it will be useful,
	32	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	33	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	34	* GNU General Public License for more details.
	35	*/
	36
	37	#define OS2EMX_PLAIN_CHAR
	38	// this is needed for "os2emx.h"; if this is defined,
	39	// emx will define PSZ as _signed_ char, otherwise
	40	// as unsigned char
	41
	42	#include <os2.h>
	43
	44	#include <stdlib.h>
	45	#include <stdio.h>
	46	#include <string.h>
	47
	48	#include "setup.h" // code generation and debugging options
	49
	50	#include "helpers\linklist.h"
	51	#include "helpers\stringh.h"
	52	#include "helpers\textview.h"
	53
	54	#include "helpers\textv_html.h"
	55
	56	/*
	57	*@@category: Helpers\PM helpers\Window classes\XTextView control\HTML conversion
[21]	58	* see textv_html.c.
[8]	59	*/
	60
	61	/* ******************************************************************
[14]	62	*
	63	* Declarations
	64	*
[8]	65	********************************************************************/
	66
	67	/*
	68	*@@ LISTDESC:
	69	* structure stored in COPYTARGET to
	70	* hold list information (UL, OL, ... tags).
	71	*
	72	*@@added V0.9.3 (2000-05-07) [umoeller]
	73	*/
	74
	75	typedef struct _LISTDESC
	76	{
	77	ULONG ulListType; // 0: unordered (UL)
	78	// 1: ordered (OL)
	79	// 2: definition lists (DL)
	80	ULONG ulItem; // list enumeration; 1 on first item,
	81	// 2 on next, ...
	82	} LISTDESC, *PLISTDESC;
	83
	84	/*
	85	*@@ COPYTARGET:
	86	* monster structure which holds the current
	87	* status of the HTML converter while conversion
	88	* is taking place. This stores input/output pointers
	89	* and various flags to avoid duplicate line breaks
	90	* and such.
	91	*
	92	* One instance of this is created in txvConvertFromHTML
	93	* on the stack and then passed to all the sub-function
	94	* calls.
	95	*
	96	*@@added V0.9.3 (2000-05-06) [umoeller]
	97	*/
	98
	99	typedef struct _COPYTARGET
	100	{
	101	PSZ pSource; // ptr into source string;
	102	// valid ONLY while we're in a tag handler
	103	PSZ pNewSource; // can be set by tag handler to skip characters;
	104	// this is set to NULL before calling a tag
	105	// handler; if this is still NULL, default
	106	// processing occurs
	107
	108	// new string:
	109	PSZ pszNew; // memory buffer
	110	ULONG cbNew; // size of buffer (reallocated)
	111	PSZ pTarget; // current char ptr into pszNew
	112
	113	// saved character while tag handler is being called
	114	CHAR cSaved;
	115
[201]	116	PSZ *ppszTitle; // out: title (ptr can be NULL)
	117	// V0.9.20 (2002-08-10) [umoeller]
[8]	118
	119	// formatting flags while going through the text
	120	BOOL fSkipNextSpace;
	121	// if TRUE, subsequent spaces are skipped
	122	BOOL fNeedsLinebreak;
	123	// if TRUE, \n is inserted before any other character
	124	BOOL fSkipNextLinebreak;
	125	// if TRUE, subsequent linebreaks are skipped
	126	BOOL fPRE;
	127	// are we currently in a PRE tag?
	128	BOOL fInLink;
	129	// are we currently in a A HREF= tag?
	130
	131	// arguments (attributes) for tag handlers
	132	PSZ pszAttributes; // != NULL while a tag handler is being called
	133	// and attributes exist for the tag
	134
	135	// anchors count
[201]	136	// USHORT usAnchorIndex; // start with 1 removed V0.9.20 (2002-08-10) [umoeller]
[8]	137
	138	// list maintenance
	139	ULONG ulListLevel; // if > 0, we're in a UL or OL block;
	140	// raised for each block
	141	ULONG ulUnorderedListLevel; // raised with each UL block to keep track
	142	// of bullets
	143	ULONG ulOrderedListLevel; // raised with each UL block to keep track
	144	// of 1), 2), a), b)... numbering
	145	ULONG ulCurrentListType; // current list type (from highest LISTDESC)
	146	BOOL fInDT; // TRUE if we're currently in a DT tag
	147	LINKLIST llLists; // stack of LISTDESC items
	148	} COPYTARGET, *PCOPYTARGET;
	149
	150	typedef VOID FNPROCESSTAG(PCOPYTARGET pct);
	151	typedef FNPROCESSTAG *PFNPROCESSTAG;
	152
	153	/* ******************************************************************
[14]	154	*
	155	* Global variables
	156	*
[8]	157	********************************************************************/
	158
	159	/* ******************************************************************
[14]	160	*
	161	* Append-char helpers
	162	*
[8]	163	********************************************************************/
	164
	165	#define COPYTARGETALLOC 100000
	166
	167	/*
	168	*@@ AppendChar:
	169	* helper for txvConvertFromHTML to
	170	* append a char to the target string
	171	* in COPYTARGET.
	172	* This performs a few additional checks
	173	* and manages memory.
	174	*
	175	*@@added V0.9.3 (2000-05-06) [umoeller]
	176	*/
	177
[222]	178	STATIC VOID AppendChar(PCOPYTARGET pct, // in/out: formatting buffer
[142]	179	unsigned char c)
[8]	180	{
	181	// calculate ofs where to store next char
	182	ULONG cbOfsNext = pct->pTarget - pct->pszNew;
	183	if (cbOfsNext >= pct->cbNew) // have we reached the buffer size yet?
	184	{
	185	// more mem needed:
	186	pct->cbNew += COPYTARGETALLOC;
	187	pct->pszNew = (PSZ)realloc(pct->pszNew, pct->cbNew);
	188	// if first call, pszNew is NULL, and realloc
	189	// behaves just like malloc
	190	// adjust target, because ptr might have changed
	191	pct->pTarget = pct->pszNew + cbOfsNext;
	192	}
	193
	194	// append character
	195	*pct->pTarget++ = c;
	196	}
	197
	198	/*
	199	*@@ AppendString:
	200	* appends the characters in *ach,
	201	* which must be null-terminated.
	202	* Does NOT append a null character though.
	203	*
	204	*@@added V0.9.3 (2000-05-06) [umoeller]
	205	*/
	206
[222]	207	STATIC VOID AppendString(PCOPYTARGET pct, // in/out: formatting buffer
[142]	208	char *ach)
[8]	209	{
	210	ULONG cbAppend = strlen(ach);
	211	ULONG ul;
	212	PSZ pSource;
	213
	214	// calculate ofs where to store next char
	215	ULONG cbOfsNext = pct->pTarget - pct->pszNew;
	216	while (cbOfsNext + cbAppend >= pct->cbNew)
	217	{
	218	// more mem needed:
	219	pct->cbNew += COPYTARGETALLOC;
	220	pct->pszNew = (PSZ)realloc(pct->pszNew, pct->cbNew);
	221	// if first call, pszNew is NULL, and realloc
	222	// behaves just like malloc
	223	// adjust target, because ptr might have changed
	224	pct->pTarget = pct->pszNew + cbOfsNext;
	225	}
	226
	227	// append characters
	228	pSource = ach;
	229	for (ul = 0;
	230	ul < cbAppend;
	231	ul++)
	232	pct->pTarget++ = pSource++;
	233	}
	234
	235	/*
	236	*@@ AppendLinebreakCheck:
	237	* checks if a linebreak is needed and
	238	* inserts one if so.
	239	*
	240	*@@added V0.9.3 (2000-05-17) [umoeller]
	241	*/
	242
[222]	243	STATIC VOID AppendLinebreakCheck(PCOPYTARGET pct)
[8]	244	{
	245	if ((!pct->fPRE) && (pct->fNeedsLinebreak))
	246	{
	247	// yes: insert linebreak; this resets pct->fNeedsLinebreak
	248	if (!pct->fSkipNextLinebreak)
	249	{
	250	AppendChar(pct, '\n');
	251
	252	if ((pct->ulListLevel) && (!pct->fInDT))
	253	// if we're in a list, add a tab also,
	254	// because we'll have a negative first-line margin
	255	AppendString(pct, TXVESC_TAB);
	256	}
	257	pct->fNeedsLinebreak = FALSE;
	258	}
	259	}
	260
	261	/*
	262	*@@ AppendEscapeWithDecimal:
	263	* appends the specified escape code
	264	* with a three-digit decimal parameter.
	265	* Calls AppendString in turn.
	266	*
	267	*@@added V0.9.3 (2000-05-07) [umoeller]
	268	*/
	269
[222]	270	STATIC VOID AppendEscapeWith3Decimals(PCOPYTARGET pct, // in/out: formatting buffer
[142]	271	char *ach,
	272	USHORT us)
[8]	273	{
	274	CHAR szDecimal[10];
	275	if (us > 999)
	276	us = 999;
	277	sprintf(szDecimal, "%03d", us);
	278	// append escape
	279	AppendString(pct, ach);
	280	AppendString(pct, szDecimal);
	281	}
	282
	283	/*
	284	*@@ AppendEscapeWith4Decimals:
	285	*
	286	*@@added V0.9.3 (2000-05-07) [umoeller]
	287	*/
	288
[222]	289	STATIC VOID AppendEscapeWith4Decimals(PCOPYTARGET pct, // in/out: formatting buffer
[142]	290	char *ach,
	291	USHORT us)
[8]	292	{
	293	CHAR szDecimal[10];
	294	if (us > 9999)
	295	us = 9999;
	296	sprintf(szDecimal, "%04d", us);
	297	// append escape
	298	AppendString(pct, ach);
	299	AppendString(pct, szDecimal);
	300	}
	301
	302	/* ******************************************************************
[14]	303	*
	304	* Tag converter functions
	305	*
[8]	306	********************************************************************/
	307
	308	/*
	309	*@@ StartList:
	310	* starts a list (UL or OL).
	311	* This uses a linked list in COPYTARGET
	312	* to keep a pseudo-stack for nested lists.
	313	*
	314	*@@added V0.9.3 (2000-05-08) [umoeller]
	315	*/
	316
[222]	317	STATIC VOID StartList(PCOPYTARGET pct, // in/out: formatting buffer
[142]	318	ULONG ulListType) // list type:
	319	// 0: unordered (UL)
	320	// 1: ordered (OL)
	321	// 2: definition lists (DL)
[8]	322	{
	323	PLISTDESC pListDesc;
	324
	325	// raise list level
	326	pct->ulListLevel++;
	327
	328	if (ulListType == 0)
	329	// unordered:
	330	pct->ulUnorderedListLevel++;
	331	else if (ulListType == 1)
	332	// ordered:
	333	pct->ulOrderedListLevel++;
	334
	335	// create LISTDESC and store it on stack
	336	pListDesc = (PLISTDESC)malloc(sizeof(LISTDESC));
	337	pListDesc->ulListType
	338	= pct->ulCurrentListType
	339	= ulListType;
	340	pListDesc->ulItem = 1;
	341
	342	lstAppendItem(&pct->llLists,
	343	pListDesc);
	344
	345	AppendEscapeWith4Decimals(pct,
	346	TXVESC_LEFTMARGIN,
	347	pct->ulListLevel * 5);
	348	AppendEscapeWith3Decimals(pct,
	349	TXVESC_FIRSTLINEMARGIN_LEFT,
	350	(ulListType == 2)
	351	? 5 // for definition lists
	352	: 3); // negative!
	353	// add \n before any other character
	354	pct->fNeedsLinebreak = TRUE;
	355	}
	356
	357	/*
	358	*@@ StopList:
	359	* stops a list (UL or OL).
	360	*
	361	*@@added V0.9.3 (2000-05-07) [umoeller]
	362	*/
	363
[222]	364	STATIC VOID StopList(PCOPYTARGET pct)
[8]	365	{
	366	if (pct->ulListLevel)
	367	{
	368	PLISTNODE pNode;
	369
	370	// lower list level
	371	pct->ulListLevel--;
	372	AppendEscapeWith4Decimals(pct,
	373	TXVESC_LEFTMARGIN,
	374	pct->ulListLevel * 5);
	375	AppendEscapeWith3Decimals(pct,
	376	TXVESC_FIRSTLINEMARGIN_LEFT,
	377	(pct->ulListLevel)
	378	? 3 // we still have a list level (nested)
	379	: 0);
	380	pct->fNeedsLinebreak = TRUE;
	381
	382	// remove the LISTDESC from the stack
	383	pNode = lstNodeFromIndex(&pct->llLists,
	384	pct->ulListLevel); // this has been lowered already
	385	if (pNode)
	386	{
	387	PLISTDESC pListDesc = (PLISTDESC)pNode->pItemData;
	388	if (pListDesc->ulListType == 0)
	389	// was unordered:
	390	pct->ulUnorderedListLevel--;
	391	else if (pListDesc->ulListType == 1)
	392	// was ordered:
	393	pct->ulOrderedListLevel--;
	394
	395	lstRemoveNode(&pct->llLists, pNode);
	396
	397	// update COPYTARGET with previous list level
	398	if (pct->ulListLevel)
	399	{
	400	// we're still in a list (nested lists):
	401	PLISTDESC pListDesc2 = (PLISTDESC)lstItemFromIndex(&pct->llLists,
	402	pct->ulListLevel - 1);
	403	if (pListDesc2)
	404	pct->ulCurrentListType = pListDesc2->ulListType;
	405	}
	406	}
	407	}
	408	// else: buggy HTML code, ignore
	409	}
	410
	411	/*
	412	*@@ TagTITLE:
	413	*
	414	*@@added V0.9.3 (2000-05-19) [umoeller]
	415	*/
	416
[222]	417	STATIC VOID TagTITLE(PCOPYTARGET pct)
[8]	418	{
	419	// pSource currently points to <TITLE tag
	420	PSZ pSource = pct->pSource + strlen(pct->pSource);
	421	// points to temporary null byte in main buffer now
	422	*pSource = pct->cSaved;
	423
[201]	424	if (pSource = strchr(pct->pSource, '>'))
[8]	425	{
[201]	426	PSZ pNextOpen;
	427	if (pNextOpen = strchr(pSource, '<'))
[8]	428	{
	429	// extract title
[201]	430	if (pct->ppszTitle)
	431	*(pct->ppszTitle) = strhSubstr(pSource + 1, pNextOpen);
	432	// adjusted V0.9.20 (2002-08-10) [umoeller]
[8]	433
	434	if (strnicmp(pNextOpen + 1, "/TITLE", 6) == 0)
	435	{
	436	// closing /TITLE tag found:
	437	// search on after that
[201]	438	if (pct->pNewSource = strchr(pNextOpen, '>'))
[8]	439	pct->pNewSource++;
	440	}
	441	}
	442	}
	443	}
	444
	445	/*
	446	*@@ TagP:
	447	*
	448	*/
	449
[222]	450	STATIC VOID TagP(PCOPYTARGET pct)
[8]	451	{
	452	// append newline:
	453	// add \n before any other character
	454	pct->fNeedsLinebreak = TRUE;
	455
	456	/* if (pct->ulListLevel)
	457	{
	458	// if we are currently in a list, we must also
	459	// add a tab escape, because we have set
	460	// the first line margin to the left of the
	461	// left margin
	462	AppendString(pct,
	463	TXVESC_TAB);
	464	} */
	465	}
	466
[222]	467	STATIC VOID TagBR(PCOPYTARGET pct)
[8]	468	{
	469	AppendChar(pct,
	470	'\r');
	471
	472	if (pct->ulListLevel)
	473	{
	474	// if we are currently in a list, we must also
	475	// add a tab escape, because we have set
	476	// the first line margin to the left of the
	477	// left margin
	478	AppendString(pct,
	479	TXVESC_TAB);
	480	}
	481	if (!pct->fPRE)
	482	pct->fSkipNextSpace = TRUE;
	483	}
	484
[222]	485	STATIC VOID TagPRE(PCOPYTARGET pct)
[8]	486	{
	487	// start of PRE tag:
	488	// add \n before any other character
	489	// pct->fNeedsLinebreak = TRUE;
	490	AppendChar(pct, '\n');
	491	pct->fNeedsLinebreak = FALSE;
	492	/* AppendString(pct,
	493	TXVESC_PRE_BEGIN); */
	494	AppendEscapeWith3Decimals(pct,
	495	TXVESC_SET_FONT,
	496	1); // monospaced font
	497	AppendEscapeWith4Decimals(pct,
	498	TXVESC_SPACEBEFORE,
	499	0); // no spacing before
	500	AppendEscapeWith4Decimals(pct,
	501	TXVESC_SPACEAFTER,
	502	0); // no spacing after
	503	// disable word-wrapping
	504	AppendString(pct,
	505	TXVESC_WORDWRAP "0");
	506	pct->fPRE = TRUE;
	507	pct->fSkipNextSpace = FALSE;
	508	}
	509
[222]	510	STATIC VOID TagXPRE(PCOPYTARGET pct)
[8]	511	{
	512	pct->fPRE = FALSE;
	513	AppendEscapeWith3Decimals(pct,
	514	TXVESC_SET_FONT,
	515	0); // standard font
	516	AppendString(pct, TXVESC_SPACEBEFORE);
	517	AppendString(pct, "####"); // reset to default
	518	AppendString(pct, TXVESC_SPACEAFTER);
	519	AppendString(pct, "####"); // reset to default
	520	// re-enable word-wrapping
	521	AppendString(pct,
	522	TXVESC_WORDWRAP "1"
	523	"\n"); // force line break
	524	pct->fNeedsLinebreak = FALSE;
	525	// refuse to add \n even if we have another "p" coming up
	526	pct->fSkipNextLinebreak = TRUE;
	527	pct->fSkipNextSpace = TRUE;
	528	}
	529
[222]	530	STATIC VOID TagH1(PCOPYTARGET pct)
[8]	531	{
	532	pct->fNeedsLinebreak = TRUE;
	533	AppendEscapeWith3Decimals(pct,
	534	TXVESC_POINTSIZE_REL,
	535	200); // double size
	536	AppendString(pct,
	537	TXVESC_BOLD_BEGIN);
	538	}
	539
[222]	540	STATIC VOID TagXH1(PCOPYTARGET pct)
[8]	541	{
	542	AppendString(pct,
	543	TXVESC_BOLD_END);
	544	AppendEscapeWith3Decimals(pct,
	545	TXVESC_POINTSIZE_REL,
	546	100); // regular size
	547	// add \n before any other character
	548	pct->fNeedsLinebreak = TRUE;
	549	}
	550
[222]	551	STATIC VOID TagH2(PCOPYTARGET pct)
[8]	552	{
	553	pct->fNeedsLinebreak = TRUE;
	554	AppendEscapeWith3Decimals(pct,
	555	TXVESC_POINTSIZE_REL,
	556	175); // size in percent of regular point size
	557	AppendString(pct,
	558	TXVESC_BOLD_BEGIN);
	559	}
	560
[222]	561	STATIC VOID TagXH2(PCOPYTARGET pct)
[8]	562	{
	563	AppendString(pct,
	564	TXVESC_BOLD_END);
	565	AppendEscapeWith3Decimals(pct,
	566	TXVESC_POINTSIZE_REL,
	567	100); // regular size
	568	// add \n before any other character
	569	pct->fNeedsLinebreak = TRUE;
	570	}
	571
[222]	572	STATIC VOID TagH3(PCOPYTARGET pct)
[8]	573	{
	574	pct->fNeedsLinebreak = TRUE;
	575	AppendEscapeWith3Decimals(pct,
	576	TXVESC_POINTSIZE_REL,
	577	150); // size in percent of regular point size
	578	AppendString(pct,
	579	TXVESC_BOLD_BEGIN);
	580	}
	581
[222]	582	STATIC VOID TagXH3(PCOPYTARGET pct)
[8]	583	{
	584	AppendString(pct,
	585	TXVESC_BOLD_END);
	586	AppendEscapeWith3Decimals(pct,
	587	TXVESC_POINTSIZE_REL,
	588	100); // size in percent of regular point size
	589	// add \n before any other character
	590	pct->fNeedsLinebreak = TRUE;
	591	}
	592
[222]	593	STATIC VOID TagH4(PCOPYTARGET pct)
[8]	594	{
	595	pct->fNeedsLinebreak = TRUE;
	596	AppendEscapeWith3Decimals(pct,
	597	TXVESC_POINTSIZE_REL,
	598	125); // size in percent of regular point size
	599	AppendString(pct,
	600	TXVESC_BOLD_BEGIN);
	601	}
	602
[222]	603	STATIC VOID TagXH4(PCOPYTARGET pct)
[8]	604	{
	605	AppendString(pct,
	606	TXVESC_BOLD_END);
	607	AppendEscapeWith3Decimals(pct,
	608	TXVESC_POINTSIZE_REL,
	609	100); // regular size
	610	// add \n before any other character
	611	pct->fNeedsLinebreak = TRUE;
	612	}
	613
[222]	614	STATIC VOID TagH5(PCOPYTARGET pct)
[8]	615	{
	616	pct->fNeedsLinebreak = TRUE;
	617	AppendEscapeWith3Decimals(pct,
	618	TXVESC_POINTSIZE_REL,
	619	100); // size in percent of regular point size
	620	AppendString(pct,
	621	TXVESC_BOLD_BEGIN);
	622	}
	623
[222]	624	STATIC VOID TagXH5(PCOPYTARGET pct)
[8]	625	{
	626	AppendString(pct,
	627	TXVESC_BOLD_END);
	628	AppendEscapeWith3Decimals(pct,
	629	TXVESC_POINTSIZE_REL,
	630	100); // regular size
	631	// add \n before any other character
	632	pct->fNeedsLinebreak = TRUE;
	633	}
	634
[222]	635	STATIC VOID TagH6(PCOPYTARGET pct)
[8]	636	{
	637	pct->fNeedsLinebreak = TRUE;
	638	AppendEscapeWith3Decimals(pct,
	639	TXVESC_POINTSIZE_REL,
	640	80 ); // size in percent of regular point size
	641	AppendString(pct,
	642	TXVESC_BOLD_BEGIN);
	643	}
	644
[222]	645	STATIC VOID TagXH6(PCOPYTARGET pct)
[8]	646	{
	647	AppendString(pct,
	648	TXVESC_BOLD_END);
	649	AppendEscapeWith3Decimals(pct,
	650	TXVESC_POINTSIZE_REL,
	651	100); // regular size
	652	// add \n before any other character
	653	pct->fNeedsLinebreak = TRUE;
	654	}
	655
[222]	656	STATIC VOID TagUL(PCOPYTARGET pct)
[8]	657	{
	658	StartList(pct,
	659	0); // unordered
	660	}
	661
[222]	662	STATIC VOID TagXUL(PCOPYTARGET pct)
[8]	663	{
	664	StopList(pct);
	665	}
	666
[222]	667	STATIC VOID TagOL(PCOPYTARGET pct)
[8]	668	{
	669	StartList(pct,
	670	1); // ordered
	671	}
	672
[222]	673	STATIC VOID TagXOL(PCOPYTARGET pct)
[8]	674	{
	675	StopList(pct);
	676	}
	677
[222]	678	STATIC VOID TagLI(PCOPYTARGET pct)
[8]	679	{
	680	PLISTDESC pListDesc;
	681	CHAR szMarker[20] = TXVESC_MARKER "\x01";
	682
	683	if (pct->ulListLevel)
	684	{
	685	// we're in a list:
	686	pListDesc = (PLISTDESC)lstItemFromIndex(&pct->llLists,
	687	pct->ulListLevel - 1);
	688	if (pListDesc)
[13]	689	{
[8]	690	if (pListDesc->ulListType == 1)
	691	// is ordered list:
[13]	692	sprintf(szMarker, "%lu.", (pListDesc->ulItem)++);
[8]	693	else if (pListDesc->ulListType == 0)
	694	// is unordered list:
	695	// set bullet type according to unordered nesting
	696	szMarker[2] = pct->ulUnorderedListLevel;
[13]	697	}
[8]	698	}
	699
	700	// add \n before any other character
	701	// pct->fNeedsLinebreak = TRUE;
	702	// if (pct->fNeedsLinebreak)
	703	{
	704	AppendChar(pct, '\n');
	705	pct->fNeedsLinebreak = FALSE;
	706	}
	707
	708	AppendString(pct, szMarker);
	709	AppendString(pct, TXVESC_TAB);
	710	}
	711
[222]	712	STATIC VOID TagDL(PCOPYTARGET pct)
[8]	713	{
	714	StartList(pct,
	715	2); // definition list
	716	}
	717
[222]	718	STATIC VOID TagXDL(PCOPYTARGET pct)
[8]	719	{
	720	StopList(pct);
	721	pct->fInDT = FALSE;
	722	}
	723
[222]	724	STATIC VOID TagDT(PCOPYTARGET pct)
[8]	725	{
	726	pct->fNeedsLinebreak = TRUE;
	727	pct->fInDT = TRUE;
	728	}
	729
[222]	730	STATIC VOID TagDD(PCOPYTARGET pct)
[8]	731	{
	732	pct->fNeedsLinebreak = TRUE;
	733	AppendString(pct, TXVESC_TAB);
	734	if (!pct->fPRE)
	735	pct->fSkipNextSpace = TRUE;
	736	pct->fInDT = FALSE;
	737	}
	738
[222]	739	STATIC VOID TagTR(PCOPYTARGET pct)
[8]	740	{
	741	pct->fNeedsLinebreak = TRUE;
	742	}
	743
[222]	744	STATIC VOID TagB(PCOPYTARGET pct)
[8]	745	{
	746	AppendString(pct,
	747	TXVESC_BOLD_BEGIN);
	748	}
	749
[222]	750	STATIC VOID TagXB(PCOPYTARGET pct)
[8]	751	{
	752	AppendString(pct,
	753	TXVESC_BOLD_END);
	754	}
	755
[222]	756	STATIC VOID TagI(PCOPYTARGET pct)
[8]	757	{
	758	AppendString(pct,
	759	TXVESC_ITALICS_BEGIN);
	760	}
	761
[222]	762	STATIC VOID TagXI(PCOPYTARGET pct)
[8]	763	{
	764	AppendString(pct,
	765	TXVESC_ITALICS_END);
	766	}
	767
[222]	768	STATIC VOID TagU(PCOPYTARGET pct)
[8]	769	{
	770	AppendString(pct,
	771	TXVESC_UNDERLINE_BEGIN);
	772	}
	773
[222]	774	STATIC VOID TagXU(PCOPYTARGET pct)
[8]	775	{
	776	AppendString(pct,
	777	TXVESC_UNDERLINE_END);
	778	}
	779
[222]	780	STATIC VOID TagSTRIKE(PCOPYTARGET pct)
[8]	781	{
	782	AppendString(pct,
	783	TXVESC_STRIKE_BEGIN);
	784	}
	785
[222]	786	STATIC VOID TagXSTRIKE(PCOPYTARGET pct)
[8]	787	{
	788	AppendString(pct,
	789	TXVESC_STRIKE_END);
	790	}
	791
[222]	792	STATIC VOID TagCODE(PCOPYTARGET pct)
[8]	793	{
	794	AppendEscapeWith3Decimals(pct,
	795	TXVESC_SET_FONT,
	796	1); // monospaced font
	797	}
	798
[222]	799	STATIC VOID TagXCODE(PCOPYTARGET pct)
[8]	800	{
	801	AppendEscapeWith3Decimals(pct,
	802	TXVESC_SET_FONT,
	803	0); // regular font
	804	}
	805
[222]	806	STATIC VOID TagA(PCOPYTARGET pct)
[8]	807	{
	808	CHAR szAnchor[10];
[201]	809	PSZ pHREF = NULL;
[8]	810
	811	pct->fInLink = FALSE;
	812
[201]	813	if (pct->pszAttributes)
[8]	814	{
	815	// we have attributes:
[201]	816	PSZ pszClosingTag;
	817	if (pszClosingTag = strchr(pct->pszAttributes, '>'))
[8]	818	{
	819	ULONG ulOfs = 0;
	820
	821	/*
	822	* HREF attribute:
	823	*
	824	*/
	825
[201]	826	PSZ pNAME = 0;
[8]	827
	828	// replace '>' with null char to mark end of search
	829	*pszClosingTag = 0;
	830
[201]	831	if (pHREF = strhGetTextAttr(pct->pszAttributes, "HREF", &ulOfs))
[8]	832	// OK, we got a link target:
	833	pct->fInLink = TRUE;
	834	// do not free
	835
	836	/*
	837	* NAME attribute:
	838	*
	839	*/
	840
[201]	841	if (pNAME = strhGetTextAttr(pct->pszAttributes, "NAME", &ulOfs))
[8]	842	{
	843	AppendString(pct,
	844	TXVESC_ANCHORNAME);
	845	AppendString(pct,
	846	pNAME);
	847	// must be terminated with 0xFF
	848	AppendChar(pct, 0xFF);
	849	free(pNAME);
	850	}
[201]	851
[8]	852	// restore '>'
	853	*pszClosingTag = '>';
	854	}
	855	}
	856
[201]	857	if (pHREF)
[8]	858	{
	859	AppendString(pct,
[201]	860	TXVESC_LINK_BEGIN);
[8]	861	AppendString(pct,
[201]	862	pHREF);
	863	// must be terminated with 0xFF
	864	AppendChar(pct, 0xFF);
	865
	866	free(pHREF);
[8]	867	}
	868	}
	869
[222]	870	STATIC VOID TagXA(PCOPYTARGET pct)
[8]	871	{
	872	if (pct->fInLink)
	873	{
	874	AppendString(pct,
[201]	875	TXVESC_LINK_END);
[8]	876	pct->fInLink = FALSE;
	877	}
	878	}
	879
	880	/* ******************************************************************
[14]	881	*
	882	* Tag helpers
	883	*
[8]	884	********************************************************************/
	885
	886	/*
	887	*@@ FindTagProcessor:
	888	* returns the Tag* function which handles the
	889	* given tag or NULL if there's none.
	890	*
	891	*@@added V0.9.4 (2000-06-10) [umoeller]
	892	*/
	893
[222]	894	STATIC PFNPROCESSTAG FindTagProcessor(PSZ pszTag)
[8]	895	{
	896	PFNPROCESSTAG pProcessor = NULL;
	897
	898	CHAR c0,
	899	c1;
	900
	901	BOOL fEndOfTag = FALSE;
	902
	903	PSZ pCheck = pszTag,
	904	p2;
	905	if (*pCheck == '/')
	906	{
	907	// end of tag:
	908	fEndOfTag = TRUE;
	909	pCheck++;
	910	}
	911
	912	c0 = *pCheck;
	913	c1 = *(pCheck + 1);
	914
	915	p2 = pCheck + 2;
	916
	917	switch (c0)
	918	{
	919	case 'A':
	920	case 'a':
	921	switch (c1)
	922	{
	923	case 0: // A
	924	if (!fEndOfTag)
	925	return TagA;
	926	else
	927	return TagXA;
	928	case 'D': // ADDRESS
	929	case 'd': // ADDRESS
	930	if (stricmp(p2, "DRESS") == 0)
[13]	931	{
[8]	932	if (!fEndOfTag)
	933	return TagI;
	934	else
	935	return TagXI;
[13]	936	}
[8]	937	}
	938	break;
	939
	940	case 'B':
	941	case 'b':
	942	switch (c1)
	943	{
	944	case 0:
	945	if (!fEndOfTag)
	946	return TagB;
	947	else
	948	return TagXB;
	949
	950	case 'R': // BR
	951	case 'r': // BR
	952	if (*p2 == 0)
	953	if (!fEndOfTag)
	954	return TagBR;
	955	}
	956	break;
	957
	958	case 'C':
	959	case 'c':
	960	switch (c1)
	961	{
	962	case 'I': // CITE
	963	case 'i': // CITE
	964	if (stricmp(p2, "TE") == 0)
	965	{
	966	if (!fEndOfTag)
	967	return TagI;
	968	else
	969	return TagXI;
	970	}
	971	break;
	972
	973	case 'O':
	974	case 'o':
	975	if (stricmp(p2, "DE") == 0)
[13]	976	{
[8]	977	if (!fEndOfTag)
	978	return TagCODE;
	979	else
	980	return TagXCODE;
[13]	981	}
[8]	982	break;
	983	}
	984	break;
	985
	986	case 'D':
	987	case 'd':
	988	switch (c1)
	989	{
	990	case 'D': // DD
	991	case 'd': // DD
	992	if ((*p2 == 0) && (!fEndOfTag))
[238]	993	return TagDD;
[8]	994	break;
	995
	996	case 'I': // DIR
	997	case 'i': // DIR
	998	if (*p2 == 'R')
	999	if (*(pCheck + 3) == 0)
[13]	1000	{
[8]	1001	if (!fEndOfTag)
	1002	return TagUL;
	1003	else
	1004	return TagXUL;
[13]	1005	}
[8]	1006	break;
	1007
	1008	case 'L': // DL
	1009	case 'l': // DL
	1010	if (*p2 == 0)
[13]	1011	{
[8]	1012	if (!fEndOfTag)
	1013	return TagDL;
	1014	else
	1015	return TagXDL;
[13]	1016	}
[8]	1017	break;
	1018
	1019	case 'T': // DT
	1020	case 't': // DT
	1021	if ((*p2 == 0) && (!fEndOfTag))
	1022	return TagDT;
	1023	break;
	1024	}
	1025	break;
	1026
	1027	case 'E':
	1028	case 'e':
	1029	if ( (c1 == 'M') \|\| (c1 == 'm') ) // EM
	1030	if (*p2 == 0)
[13]	1031	{
[8]	1032	if (!fEndOfTag)
	1033	return TagI;
	1034	else
	1035	return TagXI;
[13]	1036	}
[8]	1037	break;
	1038
	1039	case 'H':
	1040	case 'h':
	1041	if (c1)
	1042	if (*p2 == 0)
	1043	switch (c1)
	1044	{
	1045	case '1':
	1046	if (!fEndOfTag)
	1047	return TagH1;
	1048	else
	1049	return TagXH1;
	1050	case '2':
	1051	if (!fEndOfTag)
	1052	return TagH2;
	1053	else
	1054	return TagXH2;
	1055	case '3':
	1056	if (!fEndOfTag)
	1057	return TagH3;
	1058	else
	1059	return TagXH3;
	1060	case '4':
	1061	if (!fEndOfTag)
	1062	return TagH4;
	1063	else
	1064	return TagXH4;
	1065	case '5':
	1066	if (!fEndOfTag)
	1067	return TagH5;
	1068	else
	1069	return TagXH5;
	1070	case '6':
	1071	if (!fEndOfTag)
	1072	return TagH6;
	1073	else
	1074	return TagXH6;
	1075	}
	1076	break;
	1077
	1078	case 'I':
	1079	case 'i':
	1080	if (c1 == 0)
[13]	1081	{
[8]	1082	if (!fEndOfTag)
	1083	return TagI;
	1084	else
	1085	return TagXI;
[13]	1086	}
[8]	1087	break;
	1088
	1089	case 'L':
	1090	case 'l':
	1091	if ((c1 == 'I') \|\| (c1 == 'i'))
	1092	if (*p2 == 0)
	1093	return TagLI;
	1094	break;
	1095
	1096	case 'M':
	1097	case 'm':
	1098	if (stricmp(p2, "NU") == 0)
[13]	1099	{
[8]	1100	if (!fEndOfTag)
	1101	return TagUL;
	1102	else
	1103	return TagXUL;
[13]	1104	}
[8]	1105	break;
	1106
	1107	case 'O':
	1108	case 'o':
	1109	if ((c1 == 'L') \|\| (c1 == 'l'))
	1110	if (*p2 == 0)
[13]	1111	{
[8]	1112	if (!fEndOfTag)
	1113	return TagOL;
	1114	else
	1115	return TagXOL;
[13]	1116	}
[8]	1117	break;
	1118
	1119	case 'P':
	1120	case 'p':
	1121	switch (c1)
	1122	{
	1123	case 0:
	1124	if (!fEndOfTag)
	1125	return TagP;
	1126	break;
	1127
	1128	case 'R': // PRE
	1129	case 'r': // PRE
	1130	if ((p2 == 'E') \|\| (p2 == 'e'))
	1131	if (*(pCheck + 3) == 0)
[13]	1132	{
[8]	1133	if (!fEndOfTag)
	1134	return TagPRE;
	1135	else
	1136	return TagXPRE;
[13]	1137	}
[8]	1138	break;
	1139	}
	1140	break;
	1141
	1142	case 'S':
	1143	case 's':
	1144	switch (c1)
	1145	{
	1146	case 'T': // STRONG
	1147	case 't': // STRONG
	1148	if (stricmp(p2, "RONG") == 0)
[13]	1149	{
[8]	1150	if (!fEndOfTag)
	1151	return TagB;
	1152	else
	1153	return TagXB;
[13]	1154	}
[8]	1155	else if (stricmp(p2, "RIKE") == 0)
[13]	1156	{
[8]	1157	if (!fEndOfTag)
	1158	return TagSTRIKE;
	1159	else
	1160	return TagXSTRIKE;
[13]	1161	}
[8]	1162	break;
	1163
	1164	case 'A':
	1165	case 'a':
	1166	if (stricmp(p2, "MP") == 0)
[13]	1167	{
[8]	1168	if (!fEndOfTag)
	1169	return TagCODE;
	1170	else
	1171	return TagXCODE;
[13]	1172	}
[8]	1173	break;
	1174	}
	1175	break;
	1176
	1177	case 'T':
	1178	case 't':
	1179	switch (c1)
	1180	{
	1181	case 'R':
	1182	case 'r':
	1183	if (*p2 == 0)
	1184	return TagTR;
	1185	break;
	1186
	1187	case 'I':
	1188	case 'i':
	1189	if (stricmp(p2, "TLE") == 0)
	1190	return TagTITLE;
	1191	break;
	1192
	1193	case 'T': // TT
	1194	case 't':
	1195	if (*p2 == 0)
[13]	1196	{
[8]	1197	if (!fEndOfTag)
	1198	return TagCODE;
	1199	else
	1200	return TagXCODE;
[13]	1201	}
[8]	1202	break;
	1203	}
	1204	break;
	1205
	1206	case 'U':
	1207	case 'u':
	1208	switch (c1)
	1209	{
	1210	case 0:
	1211	if (!fEndOfTag)
	1212	return TagU;
	1213	else
	1214	return TagXU;
	1215
	1216	case 'L':
	1217	case 'l':
	1218	if (*p2 == 0)
[13]	1219	{
[8]	1220	if (!fEndOfTag)
	1221	return TagUL;
	1222	else
	1223	return TagXUL;
[13]	1224	}
[8]	1225	break;
	1226	}
	1227	break;
	1228
	1229	case 'V':
	1230	case 'v':
	1231	if (stricmp(p2, "R") == 0)
	1232	{
	1233	if (!fEndOfTag)
	1234	return TagI;
	1235	else
	1236	return TagXI;
	1237	}
	1238	break;
	1239
	1240	case 'X':
	1241	case 'x':
	1242	if (stricmp(p2, "MP") == 0) // XMP
	1243	{
	1244	if (!fEndOfTag)
	1245	return TagPRE;
	1246	else
	1247	return TagXPRE;
	1248	}
	1249	break;
	1250	}
	1251
[238]	1252	return pProcessor;
[8]	1253	}
	1254
	1255	/*
	1256	*@@ HandleTag:
	1257	* called by txvConvertFromHTML when a "<" character
	1258	* is found in the source buffer. This calls
	1259	* FindTagProcessor in turn to find the Tag*
	1260	* function which handles the tag.
	1261	*
	1262	*@@added V0.9.3 (2000-05-18) [umoeller]
	1263	*/
	1264
[222]	1265	STATIC VOID HandleTag(PCOPYTARGET pct)
[8]	1266	{
	1267	PSZ pStartOfTag = pct->pSource;
	1268	// '<' == begin of tag:
	1269
	1270	// is it a comment? <!-- ... -->
	1271	if (strncmp(pStartOfTag + 1, "!--", 3) == 0)
	1272	{
	1273	// start of comment:
	1274	// find end of comment
	1275	PSZ pEnd = strstr(pStartOfTag, "-->");
	1276	if (pEnd)
	1277	// found:
	1278	// search on after end of comment
	1279	pct->pSource = pEnd + 3;
	1280	else
	1281	{
	1282	// end of comment not found:
	1283	// stop formatting...
	1284	pct->pSource++;
	1285	return;
	1286	}
	1287	}
	1288	else
	1289	{
	1290	// no comment:
	1291	// find end of tag
	1292	PSZ p2 = pStartOfTag + 1,
	1293	pNextClose = 0, // receives first '>' after '<'
	1294	pNextSpace = 0; // receives first ' ' after '<'
	1295	BOOL fCont = TRUE;
	1296	while (fCont)
	1297	{
	1298	switch (*p2)
	1299	{
	1300	case ' ':
	1301	case '\r':
	1302	case '\n':
	1303	// store first space after '<'
	1304	if (!pNextSpace)
	1305	pNextSpace = p2;
	1306	// overwrite line breaks with spaces;
	1307	// otherwise we cannot handle tags which go across
	1308	// several lines, which is valid HTML
	1309	*p2 = ' ';
	1310	break;
	1311
	1312	case '>': // end of tag found:
	1313	pNextClose = p2;
	1314	fCont = FALSE;
	1315	break;
	1316
	1317	case '<':
	1318	// another opening tag:
	1319	// that's an HTML error
	1320	AppendChar(pct,
	1321	*pct->pSource++);
	1322	fCont = FALSE;
	1323	break;
	1324
	1325	case 0:
	1326	fCont = FALSE;
	1327	break;
	1328	}
	1329	p2++;
	1330	}
	1331
	1332	if (pNextClose)
	1333	{
	1334	// end of tag found:
	1335	ULONG cbTag;
[91]	1336	// PSZ pStartOfAttrs = 0;
[8]	1337
	1338	if ((pNextSpace) && (pNextSpace < pNextClose))
	1339	{
	1340	// we have attributes:
	1341	cbTag = pNextSpace - (pStartOfTag + 1);
[91]	1342	// pStartOfAttrs = pNextSpace;
[8]	1343	}
	1344	else
	1345	cbTag = pNextClose - (pStartOfTag + 1);
	1346
	1347	if (!cbTag)
	1348	{
	1349	// happens if we have a "<>" in the text:
	1350	// just insert the '<>' and go on, we have no tag here
	1351	AppendChar(pct,
	1352	*pct->pSource++);
	1353	AppendChar(pct,
	1354	*pct->pSource++);
	1355	}
	1356	else
	1357	{
	1358	PFNPROCESSTAG pTagProcessor;
	1359
	1360	pct->cSaved = *(pStartOfTag + cbTag + 1);
	1361	// add a null terminator
	1362	*(pStartOfTag + cbTag + 1) = 0;
	1363
	1364	// find corresponding tag converter function
	1365	// from G_TagProcessors map
	1366	pTagProcessor = FindTagProcessor(pStartOfTag + 1); // pszTag);
	1367
	1368	// restore char under null terminator
	1369	*(pStartOfTag + cbTag + 1) = pct->cSaved;
	1370
	1371	// reset new source ptr; the tag handler
	1372	// can modify this
	1373	pct->pNewSource = NULL;
	1374
	1375	if (pTagProcessor)
	1376	{
	1377	// tag understood:
	1378
	1379	// terminate string after closing tag
	1380	pct->cSaved = *(pNextClose + 1); // can be null byte!
	1381	*(pNextClose + 1) = 0;
	1382
	1383	// did we have attributes?
	1384	if (pNextSpace)
	1385	pct->pszAttributes = pNextSpace;
	1386
	1387	// finally, call the tag handler
	1388	(pTagProcessor) // function
	1389	(pct); // argument
	1390
	1391	*(pNextClose + 1) = pct->cSaved;
	1392	}
	1393
	1394	if (pct->pNewSource == NULL)
	1395	// tag handler needs no special processing:
	1396	// skip '>' too
	1397	pct->pSource = pNextClose + 1;
	1398	else
	1399	// tag handler has skipped something:
	1400	pct->pSource = pct->pNewSource;
	1401	}
	1402	}
	1403	}
	1404	}
	1405
	1406	/*
	1407	*@@ ConvertEscape:
	1408	* called by HandleEscape to find the ANSI (CP 1004)
	1409	* character for the given escape sequence (pszTag).
	1410	*
[82]	1411	* pszTag must be null-terminated and contain only
	1412	* the stuff between "&" and ";".
[8]	1413	*
	1414	* This is really ugly spaghetti, but it's the fastest
	1415	* way to do it.
	1416	*
	1417	*@@added V0.9.4 (2000-06-10) [umoeller]
	1418	*/
	1419
[222]	1420	STATIC unsigned char ConvertEscape(PSZ pszTag)
[8]	1421	{
	1422	CHAR c0, c1;
	1423	CHAR crc = 0;
	1424
	1425	PSZ p2 = pszTag + 2;
	1426
	1427	c0 = *pszTag;
	1428	c1 = *(pszTag + 1);
	1429
	1430	switch (c0)
	1431	{
	1432	case 'a':
	1433	switch (c1)
	1434	{
	1435	case 'a':
	1436	if (strcmp(p2, "cute") == 0)
	1437	return 225;
	1438	break;
	1439
	1440	case 'c':
	1441	if (strcmp(p2, "irc") == 0)
	1442	return 226;
	1443	else if (strcmp(p2, "ute") == 0)
	1444	return 180;
	1445	break;
	1446
	1447	case 'e':
	1448	if (strcmp(p2, "lig") == 0)
	1449	return 230;
	1450	break;
	1451
	1452	case 'g':
	1453	if (strcmp(p2, "rave") == 0)
	1454	return 224;
	1455	break;
	1456
	1457	case 'm':
	1458	if (strcmp(p2, "p") == 0)
	1459	return '&';
	1460	break;
	1461
	1462	case 'r':
	1463	if (strcmp(p2, "ing") == 0)
	1464	return 229;
	1465	break;
	1466
	1467	case 't':
	1468	if (strcmp(p2, "ilde") == 0)
	1469	return 227;
	1470	break;
	1471
	1472	case 'u':
	1473	if (strcmp(p2, "ml") == 0)
	1474	return 228;
	1475	break;
	1476	}
	1477	break;
	1478
	1479	case 'b':
	1480	if (strcmp(pszTag + 1, "rvbar") == 0)
	1481	return 166;
	1482	break;
	1483
	1484	case 'c':
	1485	switch (c1)
	1486	{
	1487	case 'c':
	1488	if (strcmp(p2, "edil") == 0)
	1489	return 231;
	1490	break;
	1491
	1492	case 'e':
	1493	if (strcmp(p2, "dil") == 0)
	1494	return 184;
	1495	else if (strcmp(p2, "nt") == 0)
	1496	return 162;
	1497	break;
	1498
	1499	case 'o':
	1500	if (strcmp(p2, "py") == 0)
	1501	return 169;
	1502	break;
	1503
	1504	case 'u':
	1505	if (strcmp(p2, "rren") == 0)
	1506	return 164;
	1507	}
	1508	break;
	1509
	1510	case 'd':
	1511	switch (c1)
	1512	{
	1513	case 'e':
	1514	if (strcmp(p2, "g") == 0) return 176;
	1515	break;
	1516
	1517	case 'i':
	1518	if (strcmp(p2, "vide") == 0) return 247;
	1519	break;
	1520	}
	1521	break;
	1522
	1523	case 'e':
	1524	switch (c1)
	1525	{
	1526	case 'a':
	1527	if (strcmp(p2, "cute") == 0) return 233;
	1528	break;
	1529
	1530	case 'c':
	1531	if (strcmp(p2, "irc") == 0) return 234;
	1532	break;
	1533
	1534	case 'g':
	1535	if (strcmp(p2, "rave") == 0) return 232;
	1536	break;
	1537
	1538	case 't':
	1539	if (strcmp(p2, "h") == 0) return 240;
	1540	break;
	1541
	1542	case 'u':
	1543	if (strcmp(p2, "ml") == 0) return 235;
	1544	break;
	1545	}
	1546	break;
	1547
	1548	case 'f':
	1549	switch (c1)
	1550	{
	1551	case 'r':
	1552	if (strcmp(p2, "ac14") == 0) return 188;
	1553	if (strcmp(p2, "ac12") == 0) return 189;
	1554	if (strcmp(p2, "ac34") == 0) return 190;
	1555	break;
	1556	}
	1557	break;
	1558
	1559	case 'g':
	1560	switch (c1)
	1561	{
	1562	case 't':
	1563	if (*p2 == 0) return '>';
	1564	}
	1565	break;
	1566
	1567	case 'i':
	1568	switch (c1)
	1569	{
	1570	case 'a':
	1571	if (strcmp(p2, "cute") == 0) return 237;
	1572	break;
	1573
	1574	case 'c':
	1575	if (strcmp(p2, "irc") == 0) return 238;
	1576	break;
	1577
	1578	case 'g':
	1579	if (strcmp(p2, "rave") == 0) return 236;
	1580	break;
	1581
	1582	case 'e':
	1583	if (strcmp(p2, "xcl") == 0) return 161;
	1584	break;
	1585
	1586	case 'q':
	1587	if (strcmp(p2, "uest") == 0) return 191;
	1588	break;
	1589
	1590	case 'u':
	1591	if (strcmp(p2, "ml") == 0) return 239;
	1592	}
	1593	break;
	1594
	1595	case 'l':
	1596	switch (c1)
	1597	{
	1598	case 't':
	1599	if (*p2 == 0)
	1600	return '<';
	1601	break;
	1602
	1603	case 'a':
	1604	if (strcmp(p2, "quo") == 0) return 171;
	1605	}
	1606	break;
	1607
	1608	case 'm':
	1609	switch (c1)
	1610	{
	1611	case 'a':
	1612	if (strcmp(p2, "cr") == 0) return 175;
	1613	break;
	1614
	1615	case 'i':
	1616	if (strcmp(p2, "cro") == 0) return 181;
	1617	if (strcmp(p2, "ddot") == 0) return 183;
	1618	break;
	1619	}
	1620	break;
	1621
	1622	case 'n':
	1623	switch (c1)
	1624	{
	1625	case 'b':
	1626	if (strcmp(p2, "sp") == 0) return 160;
	1627	break;
	1628
	1629	case 'o':
	1630	if (strcmp(p2, "t") == 0) return 172;
	1631	break;
	1632
	1633	case 't':
	1634	if (strcmp(p2, "ilde") == 0) return 241;
	1635	}
	1636	break;
	1637
	1638	case 'o':
	1639	switch (c1)
	1640	{
	1641	case 'a':
	1642	if (strcmp(p2, "cute") == 0) return 243;
	1643	break;
	1644
	1645	case 'c':
	1646	if (strcmp(p2, "irc") == 0) return 244;
	1647	break;
	1648
	1649	case 'g':
	1650	if (strcmp(p2, "rave") == 0) return 242;
	1651	break;
	1652
	1653	case 'r':
	1654	if (strcmp(p2, "df") == 0) return 170;
	1655	if (strcmp(p2, "dm") == 0) return 186;
	1656	break;
	1657
	1658	case 's':
	1659	if (strcmp(p2, "lash") == 0) return 248;
	1660	break;
	1661
	1662	case 't':
	1663	if (strcmp(p2, "ilde") == 0) return 245;
	1664	break;
	1665
	1666	case 'u':
	1667	if (strcmp(p2, "ml") == 0) return 246;
	1668	}
	1669	break;
	1670
	1671	case 'p':
	1672	switch (c1)
	1673	{
	1674	case 'a':
	1675	if (strcmp(p2, "ra") == 0) return 182;
	1676	break;
	1677
	1678	case 'l':
	1679	if (strcmp(p2, "usmn") == 0) return 177;
	1680	break;
	1681
	1682	case 'o':
	1683	if (strcmp(p2, "und") == 0) return 163;
	1684	}
	1685	break;
	1686
	1687	case 'q':
	1688	if (strcmp(pszTag, "quot") == 0) return '"';
	1689	break;
	1690
	1691	case 'r':
	1692	if (strcmp(pszTag, "raquo") == 0) return 187;
	1693	if (strcmp(pszTag, "reg") == 0) return 174;
	1694	break;
	1695
	1696	case 's':
	1697	switch (c1)
	1698	{
	1699	case 'z':
	1700	if (strcmp(p2, "lig") == 0) return 223;
	1701	break;
	1702
	1703	case 'e':
	1704	if (strcmp(p2, "ct") == 0) return 167;
	1705	break;
	1706
	1707	case 'h':
	1708	if (strcmp(p2, "y") == 0) return 173;
	1709	break;
	1710
	1711	case 'u':
	1712	if (strcmp(p2, "p1") == 0) return 185;
	1713	if (strcmp(p2, "p2") == 0) return 178;
	1714	if (strcmp(p2, "p3") == 0) return 179;
	1715	}
	1716	break;
	1717
	1718	case 't':
	1719	if (strcmp(pszTag, "thorn") == 0) return 254;
	1720	if (strcmp(pszTag, "times") == 0) return 215;
	1721	break;
	1722
	1723	case 'u':
	1724	switch (c1)
	1725	{
	1726	case 'a':
	1727	if (strcmp(p2, "cute") == 0) return 250;
	1728	break;
	1729
	1730	case 'c':
	1731	if (strcmp(p2, "irc") == 0) return 251;
	1732	break;
	1733
	1734	case 'g':
	1735	if (strcmp(p2, "rave") == 0) return 249;
	1736	break;
	1737
	1738	case 'm':
	1739	if (strcmp(p2, "l") == 0) return 168;
	1740	break;
	1741
	1742	case 'u':
	1743	if (strcmp(p2, "ml") == 0) return 252;
	1744	}
	1745	break;
	1746
	1747	case 'y':
	1748	if (strcmp(pszTag, "yacute") == 0) return 253;
	1749	if (strcmp(pszTag, "yen") == 0) return 165;
	1750	if (strcmp(pszTag, "yuml") == 0) return 255;
	1751	break;
	1752
	1753	case 'A':
	1754	switch (c1)
	1755	{
	1756	case 'u':
	1757	if (strcmp(p2, "ml") == 0) return 196;
	1758	break;
	1759
	1760	case 'a':
	1761	if (strcmp(p2, "cute") == 0) return 193;
	1762	break;
	1763
	1764	case 'c':
	1765	if (strcmp(p2, "irc") == 0) return 194;
	1766	break;
	1767
	1768	case 'E':
	1769	if (strcmp(p2, "lig") == 0) return 198;
	1770	break;
	1771
	1772	case 'g':
	1773	if (strcmp(p2, "rave") == 0) return 192;
	1774	break;
	1775
	1776	case 'r':
	1777	if (strcmp(p2, "ing") == 0) return 197;
	1778	break;
	1779
	1780	case 't':
	1781	if (strcmp(p2, "ilde") == 0) return 195;
	1782	}
	1783	break;
	1784
	1785	case 'C':
	1786	if (strcmp(pszTag, "Ccedil") == 0) return 199;
	1787	break;
	1788
	1789	case 'E':
	1790	if (strcmp(pszTag, "Ecirc") == 0) return 202;
	1791	if (strcmp(pszTag, "Eacute") == 0) return 201;
	1792	if (strcmp(pszTag, "Egrave") == 0) return 200;
	1793	if (strcmp(pszTag, "ETH") == 0) return 208;
	1794	if (strcmp(pszTag, "Euml") == 0) return 203;
	1795	break;
	1796
	1797	case 'I':
	1798	if (strcmp(pszTag, "Icirc") == 0) return 206;
	1799	if (strcmp(pszTag, "Iacute") == 0) return 205;
	1800	if (strcmp(pszTag, "Igrave") == 0) return 204;
	1801	if (strcmp(pszTag, "Iuml") == 0) return 207;
	1802	break;
	1803
	1804	case 'N':
	1805	if (strcmp(pszTag, "Ntilde") == 0) return 209;
	1806	break;
	1807
	1808	case 'O':
	1809	switch (c1)
	1810	{
	1811	case 'u':
	1812	if (strcmp(p2, "ml") == 0) return 214;
	1813	break;
	1814
	1815	case 'a':
	1816	if (strcmp(p2, "cute") == 0) return 211;
	1817	break;
	1818
	1819	case 'c':
	1820	if (strcmp(p2, "irc") == 0) return 212;
	1821	break;
	1822
	1823	case 'g':
	1824	if (strcmp(p2, "rave") == 0) return 210;
	1825	break;
	1826
	1827	case 't':
	1828	if (strcmp(p2, "ilde") == 0) return 213;
	1829	break;
	1830
	1831	case 's':
	1832	if (strcmp(p2, "lash") == 0) return 216;
	1833	}
	1834	break;
	1835
	1836	case 'U':
	1837	switch (c1)
	1838	{
	1839	case 'a':
	1840	if (strcmp(p2, "cute") == 0) return 218;
	1841	break;
	1842
	1843	case 'c':
	1844	if (strcmp(p2, "irc") == 0) return 219;
	1845	break;
	1846
	1847	case 'g':
	1848	if (strcmp(p2, "rave") == 0) return 217;
	1849	break;
	1850
	1851	case 'u':
	1852	if (strcmp(p2, "ml") == 0) return 220;
	1853	}
	1854	break;
	1855
	1856	case 'T':
	1857	if (strcmp(pszTag, "THORN") == 0) return 222;
	1858	break;
	1859
	1860	case 'Y':
	1861	if (strcmp(pszTag, "Yacute") == 0) return 221;
	1862	break;
	1863	}
	1864
[238]	1865	return crc;
[8]	1866	}
	1867
	1868	/*
	1869	*@@ HandleEscape:
	1870	* called by txvConvertFromHTML when a "&" character
	1871	* is found in the source buffer. This calls
	1872	* ConvertEscape in turn.
	1873	*
	1874	*@@added V0.9.3 (2000-05-18) [umoeller]
	1875	*/
	1876
[222]	1877	STATIC VOID HandleEscape(PCOPYTARGET pct)
[8]	1878	{
	1879	// ampersand:
	1880	// replace special characters
	1881	PSZ pStartOfTag = pct->pSource;
	1882	// find end of tag
	1883	PSZ p2 = pStartOfTag,
	1884	pNextClose = 0,
	1885	pNextSpace = 0;
	1886	BOOL fCont = TRUE;
	1887	while (fCont)
	1888	{
	1889	switch (*p2)
	1890	{
	1891	case 0:
	1892	fCont = FALSE;
	1893	break;
	1894
	1895	case ';':
	1896	pNextClose = p2;
	1897	fCont = FALSE;
	1898	break;
	1899
	1900	case ' ':
	1901	if (!pNextSpace)
	1902	pNextSpace = p2;
	1903	break;
	1904	}
	1905	p2++;
	1906	}
	1907
	1908	if (!pNextClose)
	1909	// no closing tag found:
	1910	// just insert the '&' and go on, we have no tag here
	1911	AppendChar(pct,
	1912	*pct->pSource++);
	1913	else
	1914	{
	1915	if ((pNextSpace) && (pNextSpace < pNextClose))
	1916	// space before ';':
	1917	// just insert the '&' and go on, we have no tag here
	1918	AppendChar(pct,
	1919	*pct->pSource++);
	1920	else if ((!pNextClose) \|\| (pNextClose <= pStartOfTag + 1))
	1921	AppendChar(pct,
	1922	*pct->pSource++);
	1923	else
	1924	{
	1925	ULONG ulCode = 0;
	1926
	1927	// create substring with tag
	1928	PSZ pszTag = pStartOfTag + 1;
	1929	*pNextClose = 0;
	1930
	1931	if (*pszTag == '#')
	1932	{
	1933	// latin-1 or Unicode encoding ()
	1934	ulCode = atoi(pszTag + 1);
	1935
	1936	// next input: char after ';'
	1937	pct->pSource = pNextClose + 1;
	1938	}
	1939	else
	1940	{
	1941	// named entity:
	1942	// find char code corresponding to escape
	1943	// from G_EscapeProcessors map
	1944	ulCode = ConvertEscape(pszTag);
	1945	if (ulCode)
	1946	// tag supported:
	1947	pct->pSource = pNextClose + 1;
	1948	else
	1949	// tag not supported:
	1950	ulCode = *pct->pSource++;
	1951	}
	1952
	1953	// restore closing tag which we overwrote
	1954	*pNextClose = ';';
	1955
	1956	if (ulCode)
	1957	{
	1958	AppendLinebreakCheck(pct);
	1959
	1960	AppendChar(pct,
	1961	(CHAR)ulCode);
	1962	pct->fSkipNextSpace = FALSE;
	1963	}
	1964	}
	1965	}
	1966	}
	1967
	1968	/* ******************************************************************
[14]	1969	*
	1970	* Entry points
	1971	*
[8]	1972	********************************************************************/
	1973
	1974	/*
	1975	*@@ txvConvertFromHTML:
	1976	* this modifies the given text string (which should
	1977	* be the complete BODY block of any HTML file) so
	1978	* that all HTML tags are removed and replaced with
	1979	* escape sequences that the XTextView control understands.
	1980	*
	1981	* The buffer gets reallocated by this function, so it
	1982	* must be free()'able.
	1983	*
	1984	* So, to have the XTextView control display an HTML file,
	1985	* do this:
	1986	*
	1987	* 1) Load an HTML file into a buffer allocated by malloc().
	1988	*
	1989	* 2) Call txvConvertFromHTML.
	1990	*
	1991	* 3) Call WinSetWindowText on the XTextView control with
	1992	* the modified buffer.
	1993	*
	1994	* This understands the following limited subset of HTML:
	1995	*
	1996	* Paragraph tags:
	1997	*
	1998	* -- P, BR
	1999	* -- PRE, /PRE
	2000	* -- UL, /UL, OL, /OL, LI
	2001	* -- DL, /DL, DT, DD
	2002	* -- H1, /H1 thru H6, /H6
	2003	* -- Comments (<!-- .... -->)
	2004	*
	2005	* Character tags:
	2006	*
	2007	* -- B, /B, STRONG, /STRONG
	2008	* -- I, /I, EM, /EM, VAR, /VAR, CITE, /CITE
	2009	* -- CODE, /CODE, SAMP, /SAMP, KBD, /KBD, TT, /TT
	2010	* -- U, /U
	2011	* -- STRIKE, /STRIKE
	2012	* -- CODE, /CODE
	2013	*
	2014	* The most obvious limitation is that neither tables
	2015	* nor frames are supported. Also forget about CSS
	2016	* and JavaScript, of course.
	2017	*
	2018	* All the ampersand (& something) sequences defined
	2019	* in HTML 3 are properly translated.
	2020	*
	2021	* Note: Those are translated to the ANSI (MS-Windows,
	2022	* OS/2 codepage 1004) character set. This has the
	2023	* following characteristics:
	2024	*
	2025	* -- Codes 0-127 are identical to ASCII and thus
	2026	* ISO 8559-1 ("Latin 1") also.
	2027	*
	2028	* -- Codes 160-255 are identical to ISO 8559-1 ("Latin 1").
	2029	*
	2030	* -- Codes 128-159 are NOT defined in ISO 8559-1, but
	2031	* Netscape treats those as ANSI as well, so we do too.
	2032	*
	2033	* As a result, consider the output to be in OS/2 codepage
	2034	* 1004. Either set your codepage to that (WinSetCp)
	2035	* or translate the output (WinCpTranslateString).
	2036	*
	2037	* &#xxx; tags (with xxx being a decimal) are considered
	2038	* ANSI codes as well. Even though HTML 4.0 allows Unicode
	2039	* characters > 255 to be inserted this way, we ignore
	2040	* those. Unicode chars from 0 to 255 are identical to
	2041	* ANSI, so for to ÿ, we are HTML-compliant.
	2042	*
	2043	* All other tags are completely thrown out.
	2044	*
	2045	*@@added V0.9.3 (2000-05-06) [umoeller]
[201]	2046	*@@changed V0.9.20 (2002-08-10) [umoeller]: changed prototype
[8]	2047	*/
	2048
[201]	2049	BOOL txvConvertFromHTML(PSZ *ppszText, // in/out: text (gets reallocated)
	2050	PSZ *ppszTitle, // out: if != NULL, receives malloc'd buffer with HTML title
[8]	2051	PULONG pulProgress, // out: progress (ptr can be NULL)
	2052	PBOOL pfCancel) // in: cancel flag (ptr can be NULL)
	2053	{
	2054	BOOL brc = TRUE;
	2055
	2056	ULONG cbSource = strlen(*ppszText);
	2057
	2058	COPYTARGET ct = {0};
	2059
	2060	lstInit(&ct.llLists,
	2061	TRUE); // free items
	2062
[201]	2063	ct.ppszTitle = ppszTitle; // V0.9.20 (2002-08-10) [umoeller]
	2064	// can be NULL
	2065
[8]	2066	ct.pSource = *ppszText;
	2067	// skip leading spaces
	2068	ct.fSkipNextSpace = TRUE;
	2069
	2070	// step 2:
	2071	// actual tags formatting
	2072
	2073	while (TRUE)
	2074	{
	2075	CHAR c = *ct.pSource;
	2076
	2077	if (pfCancel)
	2078	if (*pfCancel)
	2079	{
	2080	brc = FALSE;
	2081	break;
	2082	}
	2083
	2084	if (!c)
	2085	// null terminator reached:
	2086	break;
	2087
	2088	// calculate progress
	2089	if (pulProgress)
	2090	pulProgress = ((ct.pSource - ppszText) // characters done
	2091	* 100
	2092	/ cbSource); // characters total
	2093
	2094	switch (c)
	2095	{
	2096	case '<':
	2097	HandleTag(&ct);
	2098	break;
	2099
	2100	case '&':
	2101	HandleEscape(&ct);
	2102	break;
	2103
	2104	case '\r':
	2105	// skip
	2106	if (!ct.fSkipNextSpace)
	2107	{
	2108	AppendChar(&ct,
	2109	' ');
	2110	// ct.fNeedsLinebreak = FALSE;
	2111	// but skip leading spaces which might follow
	2112	if (!ct.fPRE)
	2113	ct.fSkipNextSpace = TRUE;
	2114	}
	2115	ct.pSource++;
	2116	break;
	2117
	2118	case '\t':
	2119	{
	2120	if (ct.fPRE)
	2121	{
	2122	ULONG ul;
	2123	for (ul = 0;
	2124	ul < 8;
	2125	ul++)
	2126	AppendChar(&ct,
	2127	' ');
	2128	}
	2129	else
	2130	{
	2131	// not in PRE block:
	2132	if ( (!ct.fSkipNextSpace)
	2133	// && (!ct.fNeedsLinebreak)
	2134	)
	2135	// last was not space: copy
	2136	AppendChar(&ct,
	2137	' ');
	2138
	2139	ct.fSkipNextSpace = TRUE;
	2140	}
	2141
	2142	// skip the tab
	2143	ct.pSource++;
	2144	break; }
	2145
	2146	case '\n':
	2147	{
	2148	// newline char:
	2149	if (!ct.fPRE)
	2150	{
	2151	// if not in PRE mode, replace with space
	2152	if (!ct.fSkipNextSpace)
	2153	{
	2154	AppendChar(&ct,
	2155	' ');
	2156	// ct.fNeedsLinebreak = FALSE;
	2157	// but skip leading spaces which might follow
	2158	ct.fSkipNextSpace = TRUE;
	2159	}
	2160	}
	2161	else
	2162	// in PRE mode, preserve line breaks
	2163	AppendChar(&ct, '\n'); // ct.fNeedsLinebreak = TRUE;
	2164
	2165	ct.pSource++;
	2166	break; }
	2167
	2168	case '\xFF':
	2169	{
	2170	AppendChar(&ct,
	2171	' ');
	2172	ct.pSource++;
	2173	break; }
	2174
	2175	case ' ':
	2176	if (!ct.fPRE)
	2177	{
	2178	// is space, and not in PRE block:
	2179	if ( (!ct.fSkipNextSpace)
	2180	// && (!ct.fNeedsLinebreak)
	2181	)
	2182	// last was not space: copy
	2183	AppendChar(&ct,
	2184	' ');
	2185
	2186	ct.fSkipNextSpace = TRUE;
	2187	}
	2188	else
	2189	// in PRE, always add all spaces
	2190	AppendChar(&ct,
	2191	' ');
	2192	ct.pSource++;
	2193	break;
	2194
	2195	default:
	2196	// if we're not inserting escapes or anything,
	2197	// check if a linebreak is needed
	2198	AppendLinebreakCheck(&ct);
	2199
	2200	AppendChar(&ct,
	2201	*ct.pSource++);
	2202	ct.fSkipNextSpace = FALSE;
	2203	ct.fSkipNextLinebreak = FALSE;
	2204
	2205	} // end switch (*pSource);
	2206	} // end while (*pSource)
	2207	AppendChar(&ct,
	2208	'\n');
	2209	// append null-terminator
	2210	AppendChar(&ct,
	2211	0);
	2212
	2213	free(*ppszText);
	2214	*ppszText = ct.pszNew;
	2215
	2216	lstClear(&ct.llLists);
	2217
[167]	2218	return brc;
[8]	2219	}
	2220
	2221

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/helpers/textv_html.c

Download in other formats: