diff --git a/src/dsql/dsql.cpp b/src/dsql/dsql.cpp index 2beb3594acb..fe9dd4852dc 100644 --- a/src/dsql/dsql.cpp +++ b/src/dsql/dsql.cpp @@ -594,8 +594,6 @@ static RefPtr prepareStatement(thread_db* tdbb, dsql_dbb* databas const auto charSetId = database->dbb_attachment->att_charset; - string transformedText; - { // scope to delete parser before the scratch pool is gone Jrd::ContextPoolHolder scratchContext(tdbb, scratchPool); @@ -618,41 +616,16 @@ static RefPtr prepareStatement(thread_db* tdbb, dsql_dbb* databas if (parser.isStmtAmbiguous()) scratch->flags |= DsqlCompilerScratch::FLAG_AMBIGUOUS_STMT; - transformedText = parser.getTransformedString(); - } + const string& source = parser.getTransformedString(); + string transformedText(*scratchPool); - // If the attachment charset is NONE, replace non-ASCII characters by question marks, so - // that engine internals doesn't receive non-mappeable data to UTF8. If an attachment - // charset is used, validate the string. - if (charSetId == CS_NONE) - { - for (char* p = transformedText.begin(), *end = transformedText.end(); p < end; ++p) - { - if (UCHAR(*p) > 0x7F) - *p = '?'; - } + // If the attachment charset is NONE, we first try to convert data to UTF8; + // and if that fails, replace non-ASCII characters by question marks. + static_assert(CS_METADATA == CS_UTF8); + const bool isConverted = DataTypeUtil::convertToUTF8(source, transformedText, charSetId, ERRD_post); + dsqlStatement->setSqlText(FB_NEW_POOL(*statementPool) RefString(*statementPool, + isConverted ? transformedText : source)); } - else - { - CharSet* charSet = INTL_charset_lookup(tdbb, charSetId); - - if (!charSet->wellFormed(transformedText.length(), - (const UCHAR*) transformedText.begin(), NULL)) - { - ERRD_post(Arg::Gds(isc_sqlerr) << Arg::Num(-104) << - Arg::Gds(isc_malformed_string)); - } - - UCharBuffer temp; - - CsConvert conversor(charSet->getStruct(), - INTL_charset_lookup(tdbb, CS_METADATA)->getStruct()); - conversor.convert(transformedText.length(), (const UCHAR*) transformedText.c_str(), temp); - - transformedText.assign(temp.begin(), temp.getCount()); - } - - dsqlStatement->setSqlText(FB_NEW_POOL(*statementPool) RefString(*statementPool, transformedText)); // allocate the send and receive messages diff --git a/src/jrd/DataTypeUtil.cpp b/src/jrd/DataTypeUtil.cpp index 0199e78fc9d..1ef9b747e0d 100644 --- a/src/jrd/DataTypeUtil.cpp +++ b/src/jrd/DataTypeUtil.cpp @@ -219,7 +219,10 @@ ULONG DataTypeUtilBase::convertLength(ULONG len, CSetId srcCharSet, CSetId dstCh if (dstCharSet == CS_NONE || dstCharSet == CS_BINARY) return len; - return (len / maxBytesPerChar(srcCharSet)) * maxBytesPerChar(dstCharSet); + const ULONG srcBPC = maxBytesPerChar(srcCharSet); + const ULONG dstBPC = maxBytesPerChar(dstCharSet); + + return (ROUNDUP(len, srcBPC) / srcBPC) * dstBPC; } @@ -376,17 +379,9 @@ bool DataTypeUtil::convertToUTF8(const string& src, string& dst, CSetId charset, if (charset == CS_UTF8 || charset == CS_UNICODE_FSS) return false; - if (charset == CS_NONE) - { - const FB_SIZE_T length = src.length(); - - const char* s = src.c_str(); - char* p = dst.getBuffer(length); - - for (const char* end = src.end(); s < end; ++p, ++s) - *p = (*s < 0 ? '?' : *s); - } - else // charset != CS_UTF8 + // We throw a status_exception exception to catch it and check charset again. + // If charset is NONE, we re-throw the exception through err(). + try { DataTypeUtil dtUtil(tdbb); ULONG length = dtUtil.convertLength(src.length(), charset, CS_UTF8); @@ -394,10 +389,27 @@ bool DataTypeUtil::convertToUTF8(const string& src, string& dst, CSetId charset, length = INTL_convert_bytes(tdbb, CS_UTF8, (UCHAR*) dst.getBuffer(length), length, charset, (const BYTE*) src.begin(), src.length(), - err); + status_exception::raise); dst.resize(length); } + catch (const status_exception& ex) + { + const Arg::StatusVector v(ex); + + if (charset == CS_NONE) + { + const FB_SIZE_T length = src.length(); + + const char* s = src.c_str(); + char* p = dst.getBuffer(length); + + for (const char* end = src.end(); s < end; ++p, ++s) + *p = (*s < ASCII_SPACE ? '?' : *s); + } + else + err(v); + } return true; } diff --git a/src/jrd/intl.h b/src/jrd/intl.h index cc1d7e93a9e..63f61a2d256 100644 --- a/src/jrd/intl.h +++ b/src/jrd/intl.h @@ -35,8 +35,8 @@ struct IdStorage constexpr explicit IdStorage(USHORT id) : val(id) { } constexpr operator USHORT() const { return val; } - bool operator==(const IdStorage& id) const { return val == id.val; } - bool operator!=(const IdStorage& id) const { return val != id.val; } + constexpr bool operator==(const IdStorage& id) const { return val == id.val; } + constexpr bool operator!=(const IdStorage& id) const { return val != id.val; } private: USHORT val;