GNOME Bugzilla – Bug 647140
Add tags encoding detect via enca
Last modified: 2011-05-18 20:20:43 UTC
From 38b9889266767969ed5d91c455c2b8611bf67a44 Mon Sep 17 00:00:00 2001 From: Heiher <admin@heiher.info> Date: Fri, 8 Apr 2011 15:48:36 +0800 Subject: [PATCH] Add tags encoding detect via enca --- configure.ac | 13 +++++ gst-libs/gst/tag/Makefile.am | 4 +- gst-libs/gst/tag/tags.c | 100 +++++++++++++++++++++++++++++------------- 3 files changed, 84 insertions(+), 33 deletions(-) diff --git a/configure.ac b/configure.ac index 3325abc..d64030c 100644 --- a/configure.ac +++ b/configure.ac @@ -716,6 +716,18 @@ AG_GST_CHECK_FEATURE(IVORBIS, [integer vorbis plug-in], ivorbisdec, [ fi ]) +dnl *** tags prefer to have enca *** +translit(dnm, m, l) AM_CONDITIONAL(USE_ENCA, true) +AG_GST_CHECK_FEATURE(ENCA, [enca support for tags],, [ + AG_GST_CHECK_LIBHEADER(ENCA, + enca, enca_analyser_alloc,, enca.h, [ + HAVE_ENCA="yes" + ENCA_CFLAGS="" + ENCA_LIBS="-lenca" + AC_SUBST(ENCA_LIBS) + ]) +]) + dnl *** libgio *** translit(dnm, m, l) AM_CONDITIONAL(USE_GIO, true) AG_GST_CHECK_FEATURE(GIO, [GIO library], gio, [ @@ -825,6 +837,7 @@ AM_CONDITIONAL(USE_OGG, false) AM_CONDITIONAL(USE_PANGO, false) AM_CONDITIONAL(USE_THEORA, false) AM_CONDITIONAL(USE_VORBIS, false) +AM_CONDITIONAL(USE_ENCA, false) fi dnl of EXT plugins diff --git a/gst-libs/gst/tag/Makefile.am b/gst-libs/gst/tag/Makefile.am index 9e82463..1f77b87 100644 --- a/gst-libs/gst/tag/Makefile.am +++ b/gst-libs/gst/tag/Makefile.am @@ -9,8 +9,8 @@ lib_LTLIBRARIES = libgsttag-@GST_MAJORMINOR@.la libgsttag_@GST_MAJORMINOR@_la_SOURCES = \ gstvorbistag.c gstid3tag.c gstxmptag.c gstexiftag.c \ lang.c tags.c gsttagdemux.c gsttageditingprivate.c -libgsttag_@GST_MAJORMINOR@_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) -libgsttag_@GST_MAJORMINOR@_la_LIBADD = $(GST_BASE_LIBS) $(GST_LIBS) $(LIBM) +libgsttag_@GST_MAJORMINOR@_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) $(ENCA_CFLAGS) +libgsttag_@GST_MAJORMINOR@_la_LIBADD = $(GST_BASE_LIBS) $(GST_LIBS) $(LIBM) $(ENCA_LIBS) libgsttag_@GST_MAJORMINOR@_la_LDFLAGS = $(GST_LIB_LDFLAGS) $(GST_ALL_LDFLAGS) $(GST_LT_LDFLAGS) # lang-tables.dat contains generated static data and is included by lang.c diff --git a/gst-libs/gst/tag/tags.c b/gst-libs/gst/tag/tags.c index 9521821..46e2e3a 100644 --- a/gst-libs/gst/tag/tags.c +++ b/gst-libs/gst/tag/tags.c @@ -29,6 +29,10 @@ #include <string.h> +#ifdef HAVE_ENCA +#include <enca.h> +#endif + /** * SECTION:gsttag * @short_description: additional tag definitions for plugins and applications @@ -366,10 +370,13 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, const gchar ** env_vars) { const gchar *cur_loc = NULL; - gsize bytes_read; - gchar *utf8 = NULL; +#ifdef HAVE_ENCA + EncaAnalyser eanalyser; + gchar langname[3]; + const gchar * env; +#endif g_return_val_if_fail (data != NULL, NULL); @@ -389,6 +396,35 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, goto beach; } + while (env_vars && *env_vars != NULL) { + const gchar *env = NULL; + + /* Try charsets specified via the environment */ + env = g_getenv (*env_vars); + if (env != NULL && *env != '\0') { + gchar **c, **csets; + + csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1); + + for (c = csets; c && *c; ++c) { + GST_LOG ("Trying to convert freeform string to UTF-8 from '%s'", *c); + if ((utf8 = + g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) { + if (bytes_read == size) { + g_strfreev (csets); + goto beach; + } + g_free (utf8); + utf8 = NULL; + } + } + + g_strfreev (csets); + } + ++env_vars; + } + +#ifndef HAVE_ENCA /* check for and use byte-order-mark for UTF-16/32 cases */ if (size >= 2) { const gchar *c = NULL; @@ -435,34 +471,6 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, } } - while (env_vars && *env_vars != NULL) { - const gchar *env = NULL; - - /* Try charsets specified via the environment */ - env = g_getenv (*env_vars); - if (env != NULL && *env != '\0') { - gchar **c, **csets; - - csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1); - - for (c = csets; c && *c; ++c) { - GST_LOG ("Trying to convert freeform string to UTF-8 from '%s'", *c); - if ((utf8 = - g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) { - if (bytes_read == size) { - g_strfreev (csets); - goto beach; - } - g_free (utf8); - utf8 = NULL; - } - } - - g_strfreev (csets); - } - ++env_vars; - } - /* Try current locale (if not UTF-8) */ if (!g_get_charset (&cur_loc)) { GST_LOG ("Trying to convert freeform string using locale ('%s')", cur_loc); @@ -500,9 +508,39 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, g_free (utf8); return NULL; +#else /* HAVE_ENCA */ + env = g_getenv("LANG"); + if(NULL == env) { + return NULL; + } + langname[0] = env[0]; + langname[1] = env[1]; + langname[2] = 0; + + eanalyser = enca_analyser_alloc(langname); + if(eanalyser) { + EncaEncoding encoding; + const gchar * charset; + + encoding = enca_analyse(eanalyser, (unsigned char*)data, size); + if(ENCA_CS_UNKNOWN != encoding.charset) { + charset = enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV); + + utf8 = g_convert (data, size, "UTF-8", charset, &bytes_read, NULL, NULL); + if (utf8 != NULL && bytes_read == size) { + enca_analyser_free(eanalyser); + goto beach; + } + } + + enca_analyser_free(eanalyser); + } + + g_free(utf8); + return NULL; +#endif beach: - g_strchomp (utf8); if (utf8 && utf8[0] != '\0') { GST_LOG ("Returning '%s'", utf8); -- 1.7.4.4
Not particularly keen on adding a dependency on enca or any external lib for that really. There were good reasons not to other than 'we don't do that for libs in -base', but I don't recall the details right now, I'm sure it's in one of the many duplicate bugs. Also see bug #615211.
For a start, I had written to the enca author and he said, that no one should really use this library at all :) ANother reason is that it is GPL and not LGPL.
Ok, let's just close this bug (and the two id3demux bugs) as duplicates of bug #451565 and find a better solution for this. Like the encoding detection that is implemented in gedit or mozilla. *** This bug has been marked as a duplicate of bug 451565 ***