When using a real multi-byte character set, the conversion from wide character strings to multi-byte character strings is not correctly implemented, e.g. in the “String::operator const char *”.
This is because the conversion is based on the assumption that the length in bytes of the multi-byte string will be the same as the string length of the wide character string. This does not hold, when a non-ascii character is really represented as more than one byte. The resulting multi-byte string is truncated and too short.
I have that situation especially in Linux (Ubuntu), where I want to use german umlauts (you have to call setlocale(LC_CTYPE, "");
at program startup to be able to use umlauts etc.), because the character set in the locale is a UTF-8 character set. Which meens, that umlauts are represented as 2 bytes. And I have to pass my string to other modules, which do not support wide characters, so I have to convert them.
But this would also be true, if you have a real multi-byte character set on other platforms.
To fix that, I added the following functions in the class “CharacterFunctions” to get the target length before the conversion:
static int lengthConverted (const char* const s) throw();
static int lengthConverted (const juce_wchar* const s) throw();
...
int CharacterFunctions::lengthConverted (const char* const s) throw()
{
return (int) mbstowcs (0, s, 0);
}
int CharacterFunctions::lengthConverted (const juce_wchar* const s) throw()
{
return (int) wcstombs (0, s, 0);
}
and I changed the following functions in the class “String”. To be symetrical, I also changed the conversion functions from multi-byte strings to wide character strings, although the only problem in that case might be that some unneeded memory is allocated.
String::String (const char* const t) throw()
{
if (t != 0 && *t != 0)
{
#if JUCE_STRINGS_ARE_UNICODE
const int len = CharacterFunctions::lengthConverted (t);
createInternal (len);
CharacterFunctions::copy (text->text, t, len + 1);
#else
const int len = CharacterFunctions::length (t);
createInternal (len);
memcpy (text->text, t, len + 1);
#endif
}
else
{
text = &emptyString;
emptyString.refCount = safeEmptyStringRefCount;
}
}
String::String (const juce_wchar* const t) throw()
{
if (t != 0 && *t != 0)
{
#if JUCE_STRINGS_ARE_UNICODE
const int len = CharacterFunctions::length (t);
createInternal (len);
memcpy (text->text, t, (len + 1) * sizeof (tchar));
#else
const int len = CharacterFunctions::lengthConverted (t);
createInternal (len);
CharacterFunctions::copy (text->text, t, len + 1);
#endif
}
else
{
text = &emptyString;
emptyString.refCount = safeEmptyStringRefCount;
}
}
....
#if JUCE_STRINGS_ARE_UNICODE
String::operator const char*() const throw()
{
if (isEmpty())
{
return (const char*) emptyCharString;
}
else
{
String* const mutableThis = const_cast <String*> (this);
mutableThis->dupeInternalIfMultiplyReferenced();
int len = CharacterFunctions::length (text->text) + 1;
int lenConverted = CharacterFunctions::lengthConverted (text->text) + 1;
mutableThis->text = (InternalRefCountedStringHolder*)
juce_realloc (text, sizeof (InternalRefCountedStringHolder)
+ (len * sizeof (juce_wchar) + lenConverted));
char* otherCopy = (char*) (text->text + len);
--lenConverted;
CharacterFunctions::copy (otherCopy, text->text, lenConverted);
otherCopy [lenConverted] = 0;
return otherCopy;
}
}
#else
String::operator const juce_wchar*() const throw()
{
if (isEmpty())
{
return (const juce_wchar*) emptyCharString;
}
else
{
String* const mutableThis = const_cast <String*> (this);
mutableThis->dupeInternalIfMultiplyReferenced();
int len = CharacterFunctions::length (text->text) + 1;
int lenConverted = CharacterFunctions::lengthConverted (text->text) + 1;
mutableThis->text = (InternalRefCountedStringHolder*)
juce_realloc (text, sizeof (InternalRefCountedStringHolder)
+ (lenConverted * sizeof (juce_wchar) + len));
juce_wchar* otherCopy = (juce_wchar*) (text->text + len);
--lenConverted;
CharacterFunctions::copy (otherCopy, text->text, lenConverted);
otherCopy [lenConverted] = 0;
return otherCopy;
}
}
#endif
void String::copyToBuffer (char* const destBuffer,
const int maxCharsToCopy) const throw()
{
#if JUCE_STRINGS_ARE_UNICODE
const int len = jmin (maxCharsToCopy, CharacterFunctions::lengthConverted(text->text));
CharacterFunctions::copy (destBuffer, text->text, len);
#else
const int len = jmin (maxCharsToCopy, length());
memcpy (destBuffer, text->text, len * sizeof (tchar));
#endif
destBuffer [len] = 0;
}
void String::copyToBuffer (juce_wchar* const destBuffer,
const int maxCharsToCopy) const throw()
{
#if JUCE_STRINGS_ARE_UNICODE
const int len = jmin (maxCharsToCopy, length());
memcpy (destBuffer, text->text, len * sizeof (juce_wchar));
#else
const int len = jmin (maxCharsToCopy, CharacterFunctions::lengthConverted(text->text));
CharacterFunctions::copy (destBuffer, text->text, len);
#endif
destBuffer [len] = 0;
}
Also the constructors String::String (const char* const t,
const int maxChars) throw();
String::String (const juce_wchar* const t,
const int maxChars) throw();
need to be changed. I did not do that yet, because especially for the first one has to be very carefull what maxChars really means. Does it mean the number of bytes? In the second case it should be straight forward.
Regards,
Andreas