/*
* bs_string.h:
* header file for the various string classes.
*
* This declares BSString, BSUString, and
* BSUniCodec, among other things. These give
* you not only a very efficient basic string
* class (BSString), but also full Unicode
* and codepage support and conversion between
* them.
*
* Warning: if you include this header, you'll
* get a typedef for "string" to BSString and
* for "ustring" to BSUString, which might
* conflict with other string classes that you
* might be using.
*
*@@include #include "helpers\stringh.h"
*@@include #include "helpers\xstring.h"
*@@include #include "base\bs_string.h"
*/
/*
* This file Copyright (C) 1999-2020 Ulrich M”ller.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, in version 2 as it comes in the COPYING
* file of this distribution.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef WARPIN_STRING_HEADER_INCLUDED
#define WARPIN_STRING_HEADER_INCLUDED
typedef unsigned long size_type;
class BSString;
class BSUString;
// extern BSMutex G_mtxStrings;
// #define STRINGLOCK BSLock lock(G_mtxStrings)
#define STRINGLOCK
/*
*@@ BSUniCodec:
* Unicode conversion object (COder and
* DECoder).
*
* Create one such object to allow conversion
* from UTF-8 to codepage-specific and back.
* An instance of this is required as input
* to the conversion methods from BSString
* to BSUString and vice versa, such as:
*
* -- BSString::assignUtf8 (convert UTF-8
* to codepage-specific)
*
* -- BSString::appendUtf8 (append UTF-8
* to codepage-specific)
*
* -- BSUString::assignCP (convert
* codepage-specific to UTF-8)
*
* This thing throws BSExcptBase if the
* input codepage is not supported or if
* we run into invalid code sequences.
*
*@@added V0.9.18 (2002-03-08) [umoeller]
*/
class BSUniCodec : public BSRoot
{
private:
void *_pCodec;
unsigned short _usCodepage;
char _achDBCS[12];
BOOL _fDouble; // TRUE if double-byte codepage
public:
DECLARE_CLASS(BSUniCodec);
BSUniCodec(unsigned short usCodepage);
~BSUniCodec();
void Codepage2Uni(BSUString &ustr,
const char *pcszCP,
unsigned long ulLength);
void Uni2Codepage(BSString &str,
const char *pcszUni,
unsigned long ulLength);
/*
*@@ QueryCodepage:
* returns the codepage that this
* BSUniCodec was created for.
*/
inline unsigned short QueryCodepage()
{
return _usCodepage;
}
};
/*
*@@ BSStringBuf:
* string buffer encapsulation for BSStringBase.
*
* Several BSStringBase instances can share
* the same string buffer when copied from
* one another. This is new with V0.9.18,
* while previously each BSString had its own
* buffer, which lead to a lot of redundant
* copying.
*
* Note that all methods are private in order
* not to let anyone mess with these things.
*
*@@added V0.9.18 (2002-03-08) [umoeller]
*/
class BSStringBuf : public BSRoot
{
friend class BSStringBase;
friend class BSString;
friend class BSUString;
private:
XSTRING _str;
size_t *_pShiftTable;
unsigned long _fRepeat;
unsigned long _cShared; // if 0, buf is used only by one
// instance; otherwise, no. of
// BSString's that share this buf
DECLARE_CLASS(BSStringBuf);
/*
*@@ BSStringBuf:
* constructor to create a
* new member XSTRING from the
* given C string.
*/
BSStringBuf(const char *pcsz,
unsigned long ulLength,
unsigned long cbAllocate)
: BSRoot(tBSStringBuf)
{
xstrInit(&_str, cbAllocate);
xstrcpy(&_str, pcsz, ulLength);
_pShiftTable = NULL;
_fRepeat = 0;
_cShared = 0;
}
/*
*@@ BSStringBuf:
* evil second constructor
* to take over an existing
* XSTRING. Used by BSString::_take_from
* only.
*/
BSStringBuf(XSTRING &str)
: BSRoot(tBSStringBuf)
{
memcpy(&_str, &str, sizeof(XSTRING));
_pShiftTable = NULL;
_fRepeat = 0;
_cShared = 0;
// and nuke the source XSTRING so
// that the caller won't free it
memset(&str, 0, sizeof(XSTRING));
}
/*
*@@ ~BSStringBuf:
* destructor. Frees all memory associated
* with the buffer.
*/
~BSStringBuf()
{
ClearShiftTable();
xstrClear(&_str);
}
/*
*@@ ClearShiftTable:
* clears the internal shift
* table when the string buffer
* has changed.
*/
inline void ClearShiftTable()
{
if (_pShiftTable)
{
free(_pShiftTable);
_pShiftTable = 0;
}
_fRepeat = FALSE;
}
};
/*
*@@ BSStringBase:
* common superclass for BSString and
* BSUString. This implements most of
* the functionality for the two, but
* since the constructor is protected,
* this cannot be created as such.
*
* BSString and BSUString are implemented
* as subclasses of this to prevent illegal
* assignments between codepage-specific
* and UTF-8 strings already by the compiler.
*
* This has a BSStringBuf pointer as
* a member so that several strings can
* share the same memory.
*
*@@added V0.9.18 (2002-03-08) [umoeller]
*/
class BSStringBase : public BSRoot
{
public:
DECLARE_CLASS(BSStringBase);
#ifdef __IBMCPP__
friend class BSStringBase;
#endif
// 2000-01-15: This line is extraordinary useless - class instances
// of the same type are always friends (czw)
// V0.9.1 (2000-02-01) [umoeller]: Hi Jens. Not with VAC++, apparently,
// because without this line, this wouldn't compile.
protected:
BSStringBuf *_pBuf;
/********************************************
*
* protected methods
*
********************************************/
/*
*@@ Init:
* private helper method to initialize
* the instance. Called by all constructors.
*/
void Init();
void FreeBuf();
void CopyFrom(const BSStringBase &s);
void CopyFrom(const BSStringBase &s,
size_type ulPos,
size_type n = npos);
void CopyFrom(const char *psz);
void CopyFrom(const char *p1, const char *p2);
void CopyFrom(char c);
BSStringBase(BSClassID &Class);
private:
/*
*@@ BSStringBase:
* private default copy constructor
* to make sure this can never be
* called by the subclasses.
*/
BSStringBase(const BSStringBase &s)
: BSRoot(s._Class)
{
};
protected:
BSStringBase(const BSStringBase &s,
BSClassID &Class);
virtual ~BSStringBase();
public:
static const size_type npos;
/********************************************
*
* property queries
*
********************************************/
/*
*@@ size:
* returns the no. of characters in
* the string (excluding our internal
* null terminator) or 0 if the string
* is empty.
*
* Be warned, if the string is UTF-8
* encoded, this will not take multi-byte
* encodings into account. This returns
* the number of bytes, not the number
* of UTF characters.
*/
inline size_type size()
const
{
return ((_pBuf) ? (_pBuf->_str.ulLength) : 0);
}
/*
*@@ length:
* STL string compatibility function,
* same as BSStringBase::size().
*/
inline size_type length()
const
{
return (size());
}
/*
*@@ capacity:
* returns the amount of memory presently
* allocated for the string. This is at
* least what size() or length() return,
* but can be more.
*
*@@added V0.9.6 (2000-10-31) [umoeller]
*/
inline size_type capacity()
const
{
return ((_pBuf) ? (_pBuf->_str.cbAllocated) : 0);
}
/*
*@@ operator():
* returns 1 (TRUE) if the string
* contains something, 0 (FALSE)
* otherwise.
*
* Note: as far as I know, this operator
* is NOT defined with the C++ string class.
*/
inline int operator()()
const
{
return (size() != 0);
}
/*
*@@ empty:
* returns 1 (TRUE) if the string
* contains nothing, 0 (FALSE) otherwise.
*/
inline int empty()
const
{
return (size() == 0);
}
/********************************************
*
* assignment, modification
*
********************************************/
void reserve(size_type stExtra = 0);
void _take_from(XSTRING &str);
size_type _printf(const char *pszFormatString, ...);
void _itoa10(int value, char cThousands);
BSStringBase& erase(size_type ulPos = 0, size_type n = npos);
/********************************************
*
* comparison
*
********************************************/
protected:
int compare(const BSStringBase &s) const;
int compare(const char *psz) const;
/********************************************
*
* find, replace
*
********************************************/
size_type _find_replace(BSStringBase &strFind,
const BSStringBase &strReplaceWith,
size_type *pulPos);
size_type _find_replace(const char *pszFind,
const BSStringBase &strReplaceWith,
size_type *pulPos);
public:
size_type _find_replace(char cFind,
char cReplace,
size_type *pulPos);
/********************************************
*
* misc
*
********************************************/
/*
*@@ swap:
* swaps the contents of this instance
* with that of another BSStringBase.
*
* This is very fast because only the
* pointers are exchanged.
*
*@@added V0.9.6 (2000-10-31) [umoeller]
*/
void swap(BSStringBase &str)
{
BSStringBuf *pBuf = str._pBuf;
str._pBuf = _pBuf;
_pBuf = pBuf;
}
};
/*
*@@ BSString:
* string class vaguely modelled after the C++ ANSI
* string class which IBM VAC 3.08 has no support for.
*
* This implements the "BSString" class and typedefs "string"
* to "BSString". As a result, you can use either "BSString"
* or "string".
*
* Introduction
*
* If you're not familiar with the C++ "string" class, here's
* a short introduction.
*
* Basically, the "string" class allows you to use strings
* as a basic type, like int, char, long, and so on, without
* having to worry about memory management.
*
* Here is an example:
+
+ string str = "Hello."
+
+ if (str == "Hello")
+ {
+ string str = "We should say ";
+ string str2 = str + "Hello now"
* str2 += '.';
+
+ printf("%s", str2.c_str());
+ }
*
* This shows some of the most convenient features of the
* string class:
*
* -- You can just create an instance of it on the stack.
* The destructor will automatically free the memory
* that has been allocated for the string -- no more
* memory leaks as with the C functions.
*
* -- You can compare strings using ==, !=, <, >. (This
* calls strcmp internally.)
*
* -- You can concatenate strings using the "+" operator.
*
* -- If you need a C-type string (e.g. for OS/2 API
* functions), use the BSString::c_str() method,
* which returns a "const char*" pointer to the string
* in the class.
*
* In addition, the string class has methods for more
* complicated things, such BSString::find and
* BSString::replace.
*
* Finally, this class is NOT thread-safe. If methods are
* operating on the same instance on several threads at
* the same time, this will DEFINITELY crash.
*
* C++ ANSI "string" class compatibility
*
* Unless marked otherwise, all methods and operators are
* compatible with the standard "string" class.
*
* However, we only support single-byte (8-bit) character
* sets at this point, and only a subset of the "string"
* methods and operators. But since WarpIN uses UTF-8
* for encoding things, this is not really a problem.
*
* Non-standard functions are marked with a leading
* underscore (e.g. BSString::_printf).
*
* Changes
*
* With V0.9.6, I have made improvements on memory management.
* Memory no longer gets reallocated with every change, but
* only if necessary.
*
* With V0.9.18, I finally implemented buffer sharing as
* with the C++ string class. This allows several BSString
* instances to share the same memory buffer. See BSStringBuf.
*
* In addition, V0.9.18 added Unicode support. See BSUString
* and BSUniCodec for more information.
*
* All the V0.9.18 changes are backward-compatible. So even
* though the implementation has changed significantly,
* all old code should still compile.
*/
class BSString : public BSStringBase
{
public:
DECLARE_CLASS(BSString);
/*
*@@ BSString:
* default constructor to create an empty string.
*/
BSString()
: BSStringBase(tBSString)
{
}
/*
*@@ BSString:
* default copy constructor;
* required for exception handling!
*/
BSString(const BSString &s)
: BSStringBase(s, tBSString)
{
}
BSString(const BSString &s,
size_type ulPos,
size_type n = npos);
BSString(const char *psz);
BSString(const char *p1, const char *p2);
BSString(char c);
BSString(BSUniCodec *pCodec,
const BSUString &ustr);
/********************************************
*
* property queries
*
********************************************/
char operator[](const size_type ul) const;
/*
*@@ c_str:
* returns the string member as a
* classic const C string. Note that
* this _never_ returns NULL. If the
* member string is empty, this
* returns a pointer to a null byte.
*/
inline const char* c_str()
const
{
return ((_pBuf && _pBuf->_str.psz) ? _pBuf->_str.psz : "");
}
/********************************************
*
* assignment, modification
*
********************************************/
BSString& assign(const BSString &s);
BSString& assign(const BSString &s,
size_type ulPos,
size_type n = npos);
BSString& assign(const char *psz);
BSString& assign(const char *p1, const char *p2);
BSString& assign(char c);
BSString& assignUtf8(BSUniCodec *pCodec,
const BSUString &ustr);
/*
*@@ operator=:
*
*/
inline BSString& operator=(const BSString &s)
{
return (assign(s));
}
/*
*@@ operator=:
*
*/
inline BSString& operator=(const char *psz)
{
return (assign(psz));
}
/*
*@@ operator=:
*
*/
inline BSString& operator=(char c)
{
return (assign(c));
}
BSString& append(const BSString &s);
BSString& append(const char *psz);
BSString& append(char c);
BSString& appendUtf8(BSUniCodec *pCodec, const BSUString &s);
/*
*@@ operator+=:
* shortcut to append(BSStringBase &s).
*
*@@changed V0.9.2 (2000-03-29) [umoeller]: added return *this
*/
inline BSString& operator+=(const BSString &s) // in: string to append
{
return (append(s));
}
/*
*@@ operator+=:
* shortcut to append(const char *psz).
*
*@@changed V0.9.2 (2000-03-29) [umoeller]: added return *this
*/
inline BSString& operator+=(const char *psz) // in: string to append
{
return (append(psz));
}
/*
*@@ operator+=:
* shortcut to append(char c).
*
*@@changed V0.9.2 (2000-03-29) [umoeller]: added return *this
*/
inline BSString& operator+=(char c) // in: character to append
{
return (append(c));
}
/********************************************
*
* comparison
*
********************************************/
/*
*@@ compare:
*
*/
inline int compare(const BSString &s) const
{
return BSStringBase::compare(s);
}
/*
*@@ compare:
*
*/
inline int compare(const char *psz) const
{
return BSStringBase::compare(psz);
}
int compare(size_type ulPos, size_type n, const BSString &s) const;
/********************************************
*
* find, replace
*
********************************************/
size_type find(const BSString &strFind, size_type ulPos = 0) const;
size_type find(const char *pszFind, size_type ulPos = 0) const;
size_type find(char c, size_type ulPos = 0) const;
size_type rfind(char c, size_type ulPos = npos) const;
size_type find_first_of(char c, size_type ulPos = 0) const;
size_type find_first_of(const char *achChars, size_type ulPos = 0) const;
size_type find_first_not_of(char c, size_type ulPos = 0) const;
size_type find_first_not_of(const char *achChars, size_type ulPos = 0) const;
BSString& replace(size_type ulPosThis,
size_type nThis,
const BSString &strReplace);
/* BSString& replace(size_type ulPosThis,
size_type nThis,
const BSString &strReplace,
size_type ulPosReplace = 0,
size_type nReplace = npos);
BSString& replace(size_type ulPosThis,
size_type nThis,
const char *pszReplace,
size_type nReplace);
BSString& replace(size_type ulPosThis,
size_type nThis,
const char *pszReplace); */
size_type _find_word(BSString &strFind,
size_type ulPos = 0,
const char *pcszBeginChars = "\x0d\x0a ()/\\-,.;*",
const char *pcszEndChars = "\x0d\x0a ()/\\-,.:;*&'")
const;
size_type _find_word(const char *pszFind,
size_type ulPos = 0,
const char *pcszBeginChars = "\x0d\x0a ()/\\-,.;*",
const char *pcszEndChars = "\x0d\x0a ()/\\-,.:;*&'")
const;
/*
*@@ _find_replace:
*
*/
inline size_type _find_replace(BSString &strFind,
const BSString &strReplaceWith,
size_type *pulPos)
{
return BSStringBase::_find_replace(strFind, strReplaceWith, pulPos);
}
/*
*@@ _find_replace:
*
*/
inline size_type _find_replace(const char *pszFind,
const BSString &strReplaceWith,
size_type *pulPos)
{
return BSStringBase::_find_replace(pszFind, strReplaceWith, pulPos);
}
/*
*@@ _find_replace:
*
*/
inline size_type _find_replace(char cFind,
char cReplace,
size_type *pulPos)
{
return BSStringBase::_find_replace(cFind, cReplace, pulPos);
}
BSString substr(size_type ulPos = 0,
size_type n = npos)
const;
int _extract_word(unsigned long ulIndex,
BSString &strTarget,
size_type ulPos = 0,
const char *pcszBeginChars = "\x0d\x0a ()/\\-,.;*",
const char *pcszEndChars = "\x0d\x0a ()/\\-,.:;*&'")
const;
void _format();
};
int operator==(const BSString &s1, const BSString &s2);
int operator==(const char *psz1, const BSString &s2);
int operator==(const BSString &s1, const char *psz2);
int operator!=(const BSString &s1, const BSString &s2);
int operator!=(const char *psz1, const BSString &s2);
int operator!=(const BSString &s1, const char *psz2);
int operator<(const BSString &s1, const BSString &s2);
BSString operator+(const BSString &s1, const BSString &s2);
BSString operator+(const char *psz1, const BSString &s2);
BSString operator+(const BSString &s1, const char *psz2);
/*
*@@ string:
* typedef for using BSStrings like the
* standard string class.
*/
typedef BSString string;
/*
*@@ BSUString:
* unmodified BSString subclass for marking
* strings that are not plain ASCII, but UTF-8
* encoded. This is used in some declarations
* to make sure that interfaces can react
* properly.
*
* Most notably, this has restrictions on the
* assigment and conversion methods. In order
* to not accidentally assign codepage strings
* to a BSUString, regular assignments from
* const char * to BSUStrings are not allowed.
* Use BSUstring::assignUtf8 for this.
*
* In detail, the following things do not work:
*
* -- extracting substrings, because with UTF-8
* you can never be sure if an offset points
* into the middle of a multi-byte character;
*
* -- c_str() does not exist to avoid accidentally
* passing the member buffer to legacy APIs.
* Use BSUString::GetBuffer() instead.
*
* -- "=" assignments, "==" comparisons and "+"
* concatenation between BSString and BSUString.
* See BSUniCodec on how to convert BSString's
* to BSUString's and back.
*
* However, "==" between two BSUString's works.
*
*@@added V0.9.18 (2002-03-08) [umoeller]
*/
class BSUString : public BSStringBase
{
public:
DECLARE_CLASS(BSUString);
/*
*@@ BSUString:
* default constructor to create an empty string.
*/
BSUString()
: BSStringBase(tBSUString)
{
}
/*
*@@ BSUString:
* default copy constructor;
* required for exception handling!
*/
BSUString(const BSUString &s)
: BSStringBase(s, tBSUString)
{
}
BSUString(BSUniCodec *pCodec,
const BSString &str);
BSUString(BSUniCodec *pCodec,
const char *psz);
/********************************************
*
* property queries
*
********************************************/
char operator[](const size_type ul) const;
/*
*@@ GetBuffer:
* like BSString::c_str(), but with
* a different name to make sure
* ustrings don't get passed to
* code which cannot handle that.
*/
inline const char* GetBuffer()
const
{
return ((_pBuf && _pBuf->_str.psz) ? _pBuf->_str.psz : "");
}
/********************************************
*
* assignment, modification
*
********************************************/
BSUString& assign(const BSUString &s);
BSUString& assignUtf8(const char *psz);
BSUString& assignUtf8(const char *p1, const char *p2);
BSUString& assignCP(BSUniCodec *pCodec,
const BSString &str);
BSUString& assignCP(BSUniCodec *pCodec,
const char *psz);
/*
*@@ operator=:
*
*/
inline BSUString& operator=(const BSUString &s)
{
return (assign(s));
}
BSUString& append(const BSUString &s);
BSUString& appendUtf8(const char *psz);
BSUString& appendCP(BSUniCodec *pCodec,
const BSString &str);
/*
*@@ operator+=:
* shortcut to append(BSUString &s).
*
*/
inline BSUString& operator+=(const BSUString &s) // in: string to append
{
return (append(s));
}
/********************************************
*
* comparison
*
********************************************/
inline int compare(const BSUString &s) const
{
return BSStringBase::compare(s);
}
inline int compareUtf8(const char *psz) const
{
return BSStringBase::compare(psz);
}
int compareI(const BSUString &s) const;
int compareUtf8I(const char *psz) const;
/********************************************
*
* find, replace
*
********************************************/
/*
*@@ _find_replace:
*
*/
inline size_type _find_replace(BSUString &strFind,
const BSUString &strReplaceWith,
size_type *pulPos)
{
return BSStringBase::_find_replace(strFind, strReplaceWith, pulPos);
}
/*
*@@ _find_replace:
*
*/
inline size_type _find_replace(const char *pszFind,
const BSUString &strReplaceWith,
size_type *pulPos)
{
return BSStringBase::_find_replace(pszFind, strReplaceWith, pulPos);
}
/*
*@@ _find_replace:
*
*/
inline size_type _find_replace(char cFind,
char cReplace,
size_type *pulPos)
{
return BSStringBase::_find_replace(cFind, cReplace, pulPos);
}
};
int operator==(const BSUString &s1, const BSUString &s2);
int operator!=(const BSUString &s1, const BSUString &s2);
BSUString operator+(const BSUString &s1, const BSUString &s2);
/*
*@@ ustring:
*
*@@added V0.9.18 (2002-03-08) [umoeller]
*/
typedef BSUString ustring;
#endif