Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change SString::GetUTF8NoConvert to GetUTF8 that converts the SString #71101

Merged
merged 8 commits into from
Jun 23, 2022
2 changes: 1 addition & 1 deletion src/coreclr/debug/daccess/daccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2501,7 +2501,7 @@ namespace serialization { namespace bin {
return ErrOverflow;
}

memcpy_s(dest, destSize, s.GetUTF8NoConvert(), cnt);
memcpy_s(dest, destSize, s.GetUTF8(), cnt);

return cnt;
}
Expand Down
14 changes: 7 additions & 7 deletions src/coreclr/inc/sstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ class EMPTY_BASES_DECL SString : private SBuffer
void SetASCII(const ASCII *string);
void SetUTF8(const UTF8 *string);
void SetANSI(const ANSI *string);
void SetAndConvertToUTF8(const WCHAR* string);

// Set this string to a copy of the first count chars of the given string
void Set(const WCHAR *string, COUNT_T count);
Expand Down Expand Up @@ -492,17 +493,15 @@ class EMPTY_BASES_DECL SString : private SBuffer
// SString *s = ...;
// {
// StackScratchBuffer buffer;
// const UTF8 *utf8 = s->GetUTF8(buffer);
// CallFoo(utf8);
// const ANSI *ansi = s->GetANSI(buffer);
// CallFoo(ansi);
// }
// // No more pointers to returned buffer allowed.

const UTF8 *GetUTF8(AbstractScratchBuffer &scratch) const;
const UTF8 *GetUTF8(AbstractScratchBuffer &scratch, COUNT_T *pcbUtf8) const;
const ANSI *GetANSI(AbstractScratchBuffer &scratch) const;

// Used when the representation is known, throws if the representation doesn't match
const UTF8 *GetUTF8NoConvert() const;
// You can always get a UTF8 string. This will force a conversion
// if necessary.
const UTF8 *GetUTF8() const;

// Converts/copies into the given output string
void ConvertToUnicode(SString &dest) const;
Expand Down Expand Up @@ -727,6 +726,7 @@ class EMPTY_BASES_DECL SString : private SBuffer
void ConvertASCIIToUnicode(SString &dest) const;
void ConvertToUnicode() const;
void ConvertToUnicode(const CIterator &i) const;
void ConvertToUTF8() const;

const SString &GetCompatibleString(const SString &s, SString &scratch) const;
const SString &GetCompatibleString(const SString &s, SString &scratch, const CIterator &i) const;
Expand Down
19 changes: 19 additions & 0 deletions src/coreclr/inc/sstring.inl
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,25 @@ inline const WCHAR *SString::GetUnicode() const
SS_RETURN GetRawUnicode();
}

// Get a const pointer to the internal buffer as a UTF8 string.
inline const UTF8 *SString::GetUTF8() const
{
SS_CONTRACT(const UTF8 *)
{
GC_NOTRIGGER;
PRECONDITION(CheckPointer(this));
SS_POSTCONDITION(CheckPointer(RETVAL));
if (IsRepresentation(REPRESENTATION_UTF8)) NOTHROW; else THROWS;
GC_NOTRIGGER;
SUPPORTS_DAC;
}
SS_CONTRACT_END;

ConvertToUTF8();

SS_RETURN GetRawUTF8();
}

// Normalize the string to unicode. This will make many operations nonfailing.
inline void SString::Normalize() const
{
Expand Down
8 changes: 2 additions & 6 deletions src/coreclr/utilcode/clrconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,15 +200,11 @@ namespace
#if defined(DEBUG) && !defined(SELF_NO_HOST)
// Validate the cache and no-cache logic result in the same answer
SString nameToConvert(name);
SString nameAsUTF8;
nameToConvert.ConvertToUTF8(nameAsUTF8);
SString valueAsUTF8;
temp.ConvertToUTF8(valueAsUTF8);

CLRConfigNoCache nonCache = CLRConfigNoCache::Get(nameAsUTF8.GetUTF8NoConvert(), noPrefix);
CLRConfigNoCache nonCache = CLRConfigNoCache::Get(nameToConvert.GetUTF8(), noPrefix);
LPCSTR valueNoCache = nonCache.AsString();

_ASSERTE(SString::_stricmp(valueNoCache, valueAsUTF8.GetUTF8NoConvert()) == 0);
_ASSERTE(SString::_stricmp(valueNoCache, temp.GetUTF8()) == 0);
#endif // defined(DEBUG) && !defined(SELF_NO_HOST)
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/utilcode/debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ bool _DbgBreakCheck(
" Image: %s\n\n",
GetCurrentProcessId(), GetCurrentProcessId(),
GetCurrentThreadId(), GetCurrentThreadId(),
szExpr, szFile, iLine, modulePath.GetUTF8NoConvert());
szExpr, szFile, iLine, modulePath.GetUTF8());

formattedMessages = TRUE;
}
Expand Down Expand Up @@ -680,11 +680,11 @@ void DECLSPEC_NORETURN __FreeBuildAssertFail(const char *szFile, int iLine, cons
" File: %s, Line: %d Image:\n%s\n",
GetCurrentProcessId(), GetCurrentProcessId(),
GetCurrentThreadId(), GetCurrentThreadId(),
szExpr, szFile, iLine, modulePath.GetUTF8NoConvert());
OutputDebugStringUtf8(buffer.GetUTF8NoConvert());
szExpr, szFile, iLine, modulePath.GetUTF8());
OutputDebugStringUtf8(buffer.GetUTF8());

// Write out the error to the console
printf(buffer.GetUTF8NoConvert());
printf(buffer.GetUTF8());

// Log to the stress log. Note that we can't include the szExpr b/c that
// may not be a string literal (particularly for formatt-able asserts).
Expand Down
116 changes: 56 additions & 60 deletions src/coreclr/utilcode/sstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,29 @@ void SString::SetANSI(const ANSI *string, COUNT_T count)
SS_RETURN;
}

//-----------------------------------------------------------------------------
// Set this string to a copy of the given UTF16 string transcoded to UTF8
//-----------------------------------------------------------------------------
void SString::SetAndConvertToUTF8(const WCHAR *string)
{
SS_CONTRACT_VOID
{
// !!! Check for illegal UTF8 encoding?
INSTANCE_CHECK;
PRECONDITION(CheckPointer(string, NULL_OK));
THROWS;
GC_NOTRIGGER;
SUPPORTS_DAC_HOST_ONLY;
}
SS_CONTRACT_END;

SString utf16Str(Literal, string);

utf16Str.ConvertToUTF8(*this);

SS_RETURN;
}

//-----------------------------------------------------------------------------
// Set this string to the given unicode character
//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -777,6 +800,39 @@ void SString::ConvertToUnicode(const CIterator &i) const
RETURN;
}

//-----------------------------------------------------------------------------
// Convert the internal representation for this String to UTF8.
//-----------------------------------------------------------------------------
void SString::ConvertToUTF8() const
{
CONTRACT_VOID
{
POSTCONDITION(IsRepresentation(REPRESENTATION_UTF8));
if (IsRepresentation(REPRESENTATION_UTF8)) NOTHROW; else THROWS;
GC_NOTRIGGER;
SUPPORTS_DAC_HOST_ONLY;
}
CONTRACT_END;

if (!IsRepresentation(REPRESENTATION_UTF8))
{
if (IsRepresentation(REPRESENTATION_ASCII))
{
// ASCII is a subset of UTF8, so we can just set the representation.
(const_cast<SString*>(this))->SetRepresentation(REPRESENTATION_UTF8);
}
else
{
StackSString s;
ConvertToUTF8(s);
PREFIX_ASSUME(!s.IsImmutable());
(const_cast<SString*>(this))->Set(s);
}
}

RETURN;
}

//-----------------------------------------------------------------------------
// Set s to be a copy of this string's contents, but in the unicode format.
//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -1787,66 +1843,6 @@ const CHAR *SString::GetANSI(AbstractScratchBuffer &scratch) const
SS_RETURN ((SString&)scratch).GetRawANSI();
}

//-----------------------------------------------------------------------------
// Get a const pointer to the internal buffer as a UTF8 string.
//-----------------------------------------------------------------------------
const UTF8 *SString::GetUTF8(AbstractScratchBuffer &scratch) const
{
CONTRACT(const UTF8 *)
{
INSTANCE_CHECK_NULL;
THROWS;
GC_NOTRIGGER;
}
CONTRACT_END;

if (IsRepresentation(REPRESENTATION_UTF8))
RETURN GetRawUTF8();

ConvertToUTF8((SString&)scratch);
RETURN ((SString&)scratch).GetRawUTF8();
}

const UTF8 *SString::GetUTF8(AbstractScratchBuffer &scratch, COUNT_T *pcbUtf8) const
{
CONTRACT(const UTF8 *)
{
INSTANCE_CHECK_NULL;
THROWS;
GC_NOTRIGGER;
}
CONTRACT_END;

if (IsRepresentation(REPRESENTATION_UTF8))
{
*pcbUtf8 = GetRawCount() + 1;
RETURN GetRawUTF8();
}

*pcbUtf8 = ConvertToUTF8((SString&)scratch);
RETURN ((SString&)scratch).GetRawUTF8();
}

//-----------------------------------------------------------------------------
// Get a const pointer to the internal buffer which must already be a UTF8 string.
// This avoids the need to create a scratch buffer we know will never be used.
//-----------------------------------------------------------------------------
const UTF8 *SString::GetUTF8NoConvert() const
{
CONTRACT(const UTF8 *)
{
INSTANCE_CHECK_NULL;
THROWS;
GC_NOTRIGGER;
}
CONTRACT_END;

if (IsRepresentation(REPRESENTATION_UTF8))
RETURN GetRawUTF8();

ThrowHR(E_INVALIDARG);
}

//-----------------------------------------------------------------------------
// Safe version of sprintf.
// Prints formatted ansi text w/ var args to this buffer.
Expand Down
9 changes: 3 additions & 6 deletions src/coreclr/vm/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,7 @@ MethodTable* Module::CreateArrayMethodTable(TypeHandle elemTypeHnd, CorElementTy
StackSString ssElemName;
elemTypeHnd.GetName(ssElemName);

StackScratchBuffer scratch;
jkoritzinsky marked this conversation as resolved.
Show resolved Hide resolved
elemTypeHnd.GetAssembly()->ThrowTypeLoadException(ssElemName.GetUTF8(scratch), IDS_CLASSLOAD_VALUECLASSTOOLARGE);
elemTypeHnd.GetAssembly()->ThrowTypeLoadException(ssElemName.GetUTF8(), IDS_CLASSLOAD_VALUECLASSTOOLARGE);
}
}

Expand Down Expand Up @@ -510,8 +509,7 @@ MethodTable* Module::CreateArrayMethodTable(TypeHandle elemTypeHnd, CorElementTy
#ifdef _DEBUG
StackSString debugName;
TypeString::AppendType(debugName, TypeHandle(pMT));
StackScratchBuffer buff;
const char* pDebugNameUTF8 = debugName.GetUTF8(buff);
const char* pDebugNameUTF8 = debugName.GetUTF8();
S_SIZE_T safeLen = S_SIZE_T(strlen(pDebugNameUTF8))+S_SIZE_T(1);
if(safeLen.IsOverflow()) COMPlusThrowHR(COR_E_OVERFLOW);
size_t len = safeLen.Value();
Expand Down Expand Up @@ -657,8 +655,7 @@ MethodTable* Module::CreateArrayMethodTable(TypeHandle elemTypeHnd, CorElementTy
StackSString ssElemName;
elemTypeHnd.GetName(ssElemName);

StackScratchBuffer scratch;
elemTypeHnd.GetAssembly()->ThrowTypeLoadException(ssElemName.GetUTF8(scratch),
elemTypeHnd.GetAssembly()->ThrowTypeLoadException(ssElemName.GetUTF8(),
IDS_CLASSLOAD_VALUECLASSTOOLARGE);
}

Expand Down
3 changes: 1 addition & 2 deletions src/coreclr/vm/assembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -993,8 +993,7 @@ Module *Assembly::FindModuleByName(LPCSTR pszModuleName)
SString moduleName(SString::Utf8, pszModuleName);
moduleName.LowerCase();

StackScratchBuffer buffer;
pszModuleName = moduleName.GetUTF8(buffer);
pszModuleName = moduleName.GetUTF8();

mdFile kFile = GetManifestFileToken(pszModuleName);
if (kFile == mdTokenNil)
Expand Down
21 changes: 10 additions & 11 deletions src/coreclr/vm/assemblynative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ extern "C" void QCALLTYPE AssemblyNative_InternalLoad(NativeAssemblyNameParts* p
COMPlusThrow(kArgumentException, W("Format_StringZeroLength"));

StackSString ssName;
SString(SString::Literal, pAssemblyNameParts->_pName).ConvertToUTF8(ssName);
ssName.SetAndConvertToUTF8(pAssemblyNameParts->_pName);

AssemblyMetaDataInternal asmInfo;

Expand All @@ -87,11 +87,11 @@ extern "C" void QCALLTYPE AssemblyNative_InternalLoad(NativeAssemblyNameParts* p

SmallStackSString ssLocale;
if (pAssemblyNameParts->_pCultureName != NULL)
SString(SString::Literal, pAssemblyNameParts->_pCultureName).ConvertToUTF8(ssLocale);
asmInfo.szLocale = (pAssemblyNameParts->_pCultureName != NULL) ? ssLocale.GetUTF8NoConvert() : NULL;
ssLocale.SetAndConvertToUTF8(pAssemblyNameParts->_pCultureName);
asmInfo.szLocale = (pAssemblyNameParts->_pCultureName != NULL) ? ssLocale.GetUTF8() : NULL;

// Initialize spec
spec.Init(ssName.GetUTF8NoConvert(), &asmInfo,
spec.Init(ssName.GetUTF8(), &asmInfo,
pAssemblyNameParts->_pPublicKeyOrToken, pAssemblyNameParts->_cbPublicKeyOrToken, pAssemblyNameParts->_flags);

if (pParentAssembly != NULL)
Expand Down Expand Up @@ -541,10 +541,10 @@ extern "C" BYTE * QCALLTYPE AssemblyNative_GetResource(QCall::AssemblyHandle pAs
COMPlusThrow(kArgumentNullException, W("ArgumentNull_String"));

// Get the name in UTF8
SString name(SString::Literal, wszName);
StackSString name;
name.SetAndConvertToUTF8(wszName);

StackScratchBuffer scratch;
LPCUTF8 pNameUTF8 = name.GetUTF8(scratch);
LPCUTF8 pNameUTF8 = name.GetUTF8();

if (*pNameUTF8 == '\0')
COMPlusThrow(kArgumentException, W("Format_StringZeroLength"));
Expand All @@ -571,10 +571,9 @@ extern "C" INT32 QCALLTYPE AssemblyNative_GetManifestResourceInfo(QCall::Assembl
COMPlusThrow(kArgumentNullException, W("ArgumentNull_String"));

// Get the name in UTF8
SString name(SString::Literal, wszName);

StackScratchBuffer scratch;
LPCUTF8 pNameUTF8 = name.GetUTF8(scratch);
StackSString name;
name.SetAndConvertToUTF8(wszName);
LPCUTF8 pNameUTF8 = name.GetUTF8();

if (*pNameUTF8 == '\0')
COMPlusThrow(kArgumentException, W("Format_StringZeroLength"));
Expand Down
18 changes: 14 additions & 4 deletions src/coreclr/vm/assemblyspec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,13 +275,23 @@ void AssemblySpec::InitializeAssemblyNameRef(_In_ BINDER_SPACE::AssemblyName* as
AssemblySpec spec;
spec.InitializeWithAssemblyIdentity(assemblyName);

StackScratchBuffer nameBuffer;
spec.SetName(assemblyName->GetSimpleName().GetUTF8(nameBuffer));
StackSString nameBuffer;
nameBuffer.SetAndConvertToUTF8(assemblyName->GetSimpleName().GetUnicode());
spec.SetName(nameBuffer.GetUTF8());

StackScratchBuffer cultureBuffer;
StackSString cultureBuffer;
if (assemblyName->Have(BINDER_SPACE::AssemblyIdentity::IDENTITY_FLAG_CULTURE))
{
LPCSTR culture = assemblyName->IsNeutralCulture() ? "" : assemblyName->GetCulture().GetUTF8(cultureBuffer);
LPCSTR culture;
if (assemblyName->IsNeutralCulture())
{
culture = "";
}
else
{
cultureBuffer.SetAndConvertToUTF8(assemblyName->GetCulture().GetUnicode());
culture = cultureBuffer.GetUTF8();
}
spec.SetCulture(culture);
}

Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/vm/bundle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@ BundleFileLocation Bundle::Probe(const SString& path, bool pathIsBundleRelative)
// Bundle.Probe("path/to/exe/lib.dll") => m_probe("lib.dll")
// Bundle.Probe("path/to/exe/and/some/more/lib.dll") => m_probe("and/some/more/lib.dll")

StackScratchBuffer scratchBuffer;
LPCSTR utf8Path(path.GetUTF8(scratchBuffer));
StackSString pathBuffer;
pathBuffer.SetAndConvertToUTF8(path.GetUnicode());
LPCSTR utf8Path(pathBuffer.GetUTF8());

if (!pathIsBundleRelative)
{
Expand Down
Loading