After an evaluation, GNOME has moved from Bugzilla to GitLab. Learn more about GitLab.
No new issues can be reported in GNOME Bugzilla anymore.
To report an issue in a GNOME project, go to GNOME GitLab.
Do not go to GNOME Gitlab for: Bluefish, Doxygen, GnuCash, GStreamer, java-gnome, LDTP, NetworkManager, Tomboy.
Bug 647140 - Add tags encoding detect via enca
Add tags encoding detect via enca
Status: RESOLVED DUPLICATE of bug 451565
Product: GStreamer
Classification: Platform
Component: gst-plugins-base
0.10.32
Other Linux
: Normal enhancement
: git master
Assigned To: GStreamer Maintainers
GStreamer Maintainers
Depends on:
Blocks:
 
 
Reported: 2011-04-08 08:05 UTC by Heiher
Modified: 2011-05-18 20:20 UTC
See Also:
GNOME target: ---
GNOME version: ---



Description Heiher 2011-04-08 08:05:09 UTC
From 38b9889266767969ed5d91c455c2b8611bf67a44 Mon Sep 17 00:00:00 2001
From: Heiher <admin@heiher.info>
Date: Fri, 8 Apr 2011 15:48:36 +0800
Subject: [PATCH] Add tags encoding detect via enca

---
 configure.ac                 |   13 +++++
 gst-libs/gst/tag/Makefile.am |    4 +-
 gst-libs/gst/tag/tags.c      |  100 +++++++++++++++++++++++++++++-------------
 3 files changed, 84 insertions(+), 33 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3325abc..d64030c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -716,6 +716,18 @@ AG_GST_CHECK_FEATURE(IVORBIS, [integer vorbis plug-in], ivorbisdec, [
   fi
 ])
 
+dnl *** tags prefer to have enca ***
+translit(dnm, m, l) AM_CONDITIONAL(USE_ENCA, true)
+AG_GST_CHECK_FEATURE(ENCA, [enca support for tags],, [
+  AG_GST_CHECK_LIBHEADER(ENCA,
+    enca, enca_analyser_alloc,, enca.h, [
+    HAVE_ENCA="yes"
+	ENCA_CFLAGS=""
+    ENCA_LIBS="-lenca"
+    AC_SUBST(ENCA_LIBS)
+  ])
+])
+
 dnl *** libgio ***
 translit(dnm, m, l) AM_CONDITIONAL(USE_GIO, true)
 AG_GST_CHECK_FEATURE(GIO, [GIO library], gio, [
@@ -825,6 +837,7 @@ AM_CONDITIONAL(USE_OGG, false)
 AM_CONDITIONAL(USE_PANGO, false)
 AM_CONDITIONAL(USE_THEORA, false)
 AM_CONDITIONAL(USE_VORBIS, false)
+AM_CONDITIONAL(USE_ENCA, false)
 
 fi dnl of EXT plugins
 
diff --git a/gst-libs/gst/tag/Makefile.am b/gst-libs/gst/tag/Makefile.am
index 9e82463..1f77b87 100644
--- a/gst-libs/gst/tag/Makefile.am
+++ b/gst-libs/gst/tag/Makefile.am
@@ -9,8 +9,8 @@ lib_LTLIBRARIES = libgsttag-@GST_MAJORMINOR@.la
 libgsttag_@GST_MAJORMINOR@_la_SOURCES = \
     gstvorbistag.c gstid3tag.c gstxmptag.c gstexiftag.c \
     lang.c tags.c gsttagdemux.c gsttageditingprivate.c
-libgsttag_@GST_MAJORMINOR@_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS)
-libgsttag_@GST_MAJORMINOR@_la_LIBADD = $(GST_BASE_LIBS) $(GST_LIBS) $(LIBM)
+libgsttag_@GST_MAJORMINOR@_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) $(ENCA_CFLAGS)
+libgsttag_@GST_MAJORMINOR@_la_LIBADD = $(GST_BASE_LIBS) $(GST_LIBS) $(LIBM) $(ENCA_LIBS)
 libgsttag_@GST_MAJORMINOR@_la_LDFLAGS = $(GST_LIB_LDFLAGS) $(GST_ALL_LDFLAGS) $(GST_LT_LDFLAGS)
 
 # lang-tables.dat contains generated static data and is included by lang.c
diff --git a/gst-libs/gst/tag/tags.c b/gst-libs/gst/tag/tags.c
index 9521821..46e2e3a 100644
--- a/gst-libs/gst/tag/tags.c
+++ b/gst-libs/gst/tag/tags.c
@@ -29,6 +29,10 @@
 
 #include <string.h>
 
+#ifdef HAVE_ENCA
+#include <enca.h>
+#endif
+
 /**
  * SECTION:gsttag
  * @short_description: additional tag definitions for plugins and applications
@@ -366,10 +370,13 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
     const gchar ** env_vars)
 {
   const gchar *cur_loc = NULL;
-
   gsize bytes_read;
-
   gchar *utf8 = NULL;
+#ifdef HAVE_ENCA
+  EncaAnalyser eanalyser;
+  gchar langname[3];
+  const gchar * env;
+#endif
 
   g_return_val_if_fail (data != NULL, NULL);
 
@@ -389,6 +396,35 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
     goto beach;
   }
 
+  while (env_vars && *env_vars != NULL) {
+    const gchar *env = NULL;
+
+    /* Try charsets specified via the environment */
+    env = g_getenv (*env_vars);
+    if (env != NULL && *env != '\0') {
+      gchar **c, **csets;
+
+      csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
+
+      for (c = csets; c && *c; ++c) {
+        GST_LOG ("Trying to convert freeform string to UTF-8 from '%s'", *c);
+        if ((utf8 =
+                g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
+          if (bytes_read == size) {
+            g_strfreev (csets);
+            goto beach;
+          }
+          g_free (utf8);
+          utf8 = NULL;
+        }
+      }
+
+      g_strfreev (csets);
+    }
+    ++env_vars;
+  }
+
+#ifndef HAVE_ENCA
   /* check for and use byte-order-mark for UTF-16/32 cases */
   if (size >= 2) {
     const gchar *c = NULL;
@@ -435,34 +471,6 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
     }
   }
 
-  while (env_vars && *env_vars != NULL) {
-    const gchar *env = NULL;
-
-    /* Try charsets specified via the environment */
-    env = g_getenv (*env_vars);
-    if (env != NULL && *env != '\0') {
-      gchar **c, **csets;
-
-      csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
-
-      for (c = csets; c && *c; ++c) {
-        GST_LOG ("Trying to convert freeform string to UTF-8 from '%s'", *c);
-        if ((utf8 =
-                g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
-          if (bytes_read == size) {
-            g_strfreev (csets);
-            goto beach;
-          }
-          g_free (utf8);
-          utf8 = NULL;
-        }
-      }
-
-      g_strfreev (csets);
-    }
-    ++env_vars;
-  }
-
   /* Try current locale (if not UTF-8) */
   if (!g_get_charset (&cur_loc)) {
     GST_LOG ("Trying to convert freeform string using locale ('%s')", cur_loc);
@@ -500,9 +508,39 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
 
   g_free (utf8);
   return NULL;
+#else /* HAVE_ENCA */
+  env = g_getenv("LANG");
+  if(NULL == env) {
+	  return NULL;
+  }
+  langname[0] = env[0];
+  langname[1] = env[1];
+  langname[2] = 0;
+
+  eanalyser = enca_analyser_alloc(langname);
+  if(eanalyser) {
+	  EncaEncoding encoding;
+	  const gchar * charset;
+
+	  encoding = enca_analyse(eanalyser, (unsigned char*)data, size);
+	  if(ENCA_CS_UNKNOWN != encoding.charset) {
+		  charset = enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV);
+
+		  utf8 = g_convert (data, size, "UTF-8", charset, &bytes_read, NULL, NULL);
+		  if (utf8 != NULL && bytes_read == size) {
+			  enca_analyser_free(eanalyser);
+			  goto beach;
+		  }
+	  }
+
+	  enca_analyser_free(eanalyser);
+  }
+
+  g_free(utf8);
+  return NULL;
+#endif
 
 beach:
-
   g_strchomp (utf8);
   if (utf8 && utf8[0] != '\0') {
     GST_LOG ("Returning '%s'", utf8);
-- 
1.7.4.4
Comment 1 Tim-Philipp Müller 2011-04-24 11:22:03 UTC
Not particularly keen on adding a dependency on enca or any external lib for that really. There were good reasons not to other than 'we don't do that for libs in -base', but I don't recall the details right now, I'm sure it's in one of the many duplicate bugs. Also see bug #615211.
Comment 2 Stefan Sauer (gstreamer, gtkdoc dev) 2011-04-26 21:08:27 UTC
For a start, I had written to the enca author and he said, that no one should really use this library at all :) ANother reason is that it is GPL and not LGPL.
Comment 3 Sebastian Dröge (slomo) 2011-05-18 20:20:43 UTC
Ok, let's just close this bug (and the two id3demux bugs) as duplicates of bug #451565 and find a better solution for this. Like the encoding detection that is implemented in gedit or mozilla.

*** This bug has been marked as a duplicate of bug 451565 ***