diff --git a/src/mlang-charset.cpp b/src/mlang-charset.cpp index 1e9eea1..c4cd1fa 100644 --- a/src/mlang-charset.cpp +++ b/src/mlang-charset.cpp @@ -1,136 +1,188 @@ /* @file mlang-charset.cpp * @brief Convert between charsets using Mlang * * Copyright (C) 2015 by Bundesamt für Sicherheit in der Informationstechnik * Software engineering by Intevation GmbH * * This file is part of GpgOL. * * GpgOL is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * GpgOL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, see . */ #include "config.h" #include "common.h" #define INITGUID #include DEFINE_GUID (IID_IMultiLanguage, 0x275c23e1,0x3747,0x11d0,0x9f, 0xea,0x00,0xaa,0x00,0x3f,0x86,0x46); #include #undef INITGUID #include "dialogs.h" #include "dispcache.h" #include "mlang-charset.h" +static char * +iconv_to_utf8 (const char *charset, const char *input, size_t inlen) +{ + if (!charset || !input) + { + STRANGEPOINT; + return nullptr; + } + + gpgrt_w32_iconv_t ctx = gpgrt_w32_iconv_open ("UTF-8", charset); + if (!ctx || ctx == (gpgrt_w32_iconv_t)-1) + { + log_debug ("%s:%s: Failed to open iconv ctx for '%s'", + SRCNAME, __func__, charset); + return nullptr; + } + + size_t len = 0; + + for (const unsigned char *s = (const unsigned char*) input; *s; s++) + { + len++; + if ((*s & 0x80)) + { + len += 5; /* We may need up to 6 bytes for the utf8 output. */ + } + } + + char *buffer = (char*) xmalloc (len + 1); + char *outptr = buffer; + size_t outbytes = len; + size_t ret = gpgrt_w32_iconv (ctx, (const char **)&input, &inlen, + &outptr, &outbytes); + gpgrt_w32_iconv_close (ctx); + if (ret == -1) + { + log_error ("%s:%s: Conversion failed for '%s'", + SRCNAME, __func__, charset); + xfree (buffer); + return nullptr; + } + return buffer; +} + char *ansi_charset_to_utf8 (const char *charset, const char *input, size_t inlen, int codepage) { LPMULTILANGUAGE multilang = NULL; MIMECSETINFO mime_info; HRESULT err; DWORD enc; DWORD mode = 0; unsigned int wlen = 0, uinlen = 0; wchar_t *buf; char *ret; if ((!charset || !strlen (charset)) && !codepage) { log_debug ("%s:%s: No charset / codepage returning plain.", SRCNAME, __func__); return xstrdup (input); } auto cache = DispCache::instance (); LPDISPATCH cachedLang = cache->getDisp (DISPID_MLANG_CHARSET); if (!cachedLang) { CoCreateInstance(CLSID_CMultiLanguage, NULL, CLSCTX_INPROC_SERVER, IID_IMultiLanguage, (void**)&multilang); memdbg_addRef (multilang); cache->addDisp (DISPID_MLANG_CHARSET, (LPDISPATCH) multilang); } else { multilang = (LPMULTILANGUAGE) cachedLang; } if (!multilang) { log_error ("%s:%s: Failed to get multilang obj.", SRCNAME, __func__); return NULL; } if (inlen > UINT_MAX) { log_error ("%s:%s: Inlen too long. Bug.", SRCNAME, __func__); return NULL; } uinlen = (unsigned int) inlen; if (!codepage) { mime_info.uiCodePage = 0; mime_info.uiInternetEncoding = 0; BSTR w_charset = utf8_to_wchar (charset); err = multilang->GetCharsetInfo (w_charset, &mime_info); xfree (w_charset); if (err != S_OK) { - log_error ("%s:%s: Failed to find charset for: %s", + log_debug ("%s:%s: Failed to find charset for: %s fallback to iconv", SRCNAME, __func__, charset); + /* We only use this as a fallback as the old code was older and + known to work in most cases. */ + ret = iconv_to_utf8 (charset, input, inlen); + if (ret) + { + return ret; + } + return xstrdup (input); } enc = (mime_info.uiInternetEncoding == 0) ? mime_info.uiCodePage : mime_info.uiInternetEncoding; } else { enc = codepage; } /** Get the size of the result */ err = multilang->ConvertStringToUnicode(&mode, enc, const_cast(input), &uinlen, NULL, &wlen); if (FAILED (err)) { log_error ("%s:%s: Failed conversion.", SRCNAME, __func__); return NULL; } buf = (wchar_t*) xmalloc(sizeof(wchar_t) * (wlen + 1)); err = multilang->ConvertStringToUnicode(&mode, enc, const_cast(input), &uinlen, buf, &wlen); if (FAILED (err)) { log_error ("%s:%s: Failed conversion 2.", SRCNAME, __func__); xfree (buf); return NULL; } /* Doc is not clear if this is terminated. */ buf[wlen] = L'\0'; ret = wchar_to_utf8 (buf); xfree (buf); return ret; }