GNOME Bugzilla – Bug 347585
segv in gtk_text_iter_ends_line with gcc 4.1.1 -finline-functions (strict-aliasing)
Last modified: 2011-02-04 16:10:42 UTC
$ python -c 'import gtk; b = gtk.TextBuffer(); b.set_text("Foo\n"); i = b.get_iter_at_offset(0); i.forward_to_line_end()' Segmentation fault (gdb) r -c 'import gtk; b = gtk.TextBuffer(); b.set_text("Foo\n"); i = b.get_iter_at_offset(0); i.forward_to_line_end()' Starting program: /usr/bin/python -c 'import gtk; b = gtk.TextBuffer(); b.set_text("Foo\n"); i = b.get_iter_at_offset(0); i.forward_to_line_end()' [Thread debugging using libthread_db enabled] [New Thread -1210919248 (LWP 21855)] Program received signal SIGSEGV, Segmentation fault.
+ Trace 69348
Thread NaN (LWP 21855)
Dump of assembler code for function _gtk_text_btree_get_chars_changed_stamp: 0xb77f7f00 <_gtk_text_btree_get_chars_changed_stamp+0>: push %ebp 0xb77f7f01 <_gtk_text_btree_get_chars_changed_stamp+1>: mov %esp,%ebp 0xb77f7f03 <_gtk_text_btree_get_chars_changed_stamp+3>: mov 0x8(%ebp),%eax 0xb77f7f06 <_gtk_text_btree_get_chars_changed_stamp+6>: mov 0x28(%eax),%eax 0xb77f7f09 <_gtk_text_btree_get_chars_changed_stamp+9>: leave 0xb77f7f0a <_gtk_text_btree_get_chars_changed_stamp+10>: ret End of assembler dump. (gdb) disass IA__gtk_text_iter_ends_line Dump of assembler code for function IA__gtk_text_iter_ends_line: 0xb7816fa0 <IA__gtk_text_iter_ends_line+0>: push %ebp 0xb7816fa1 <IA__gtk_text_iter_ends_line+1>: mov %esp,%ebp 0xb7816fa3 <IA__gtk_text_iter_ends_line+3>: sub $0x68,%esp 0xb7816fa6 <IA__gtk_text_iter_ends_line+6>: mov %esi,0xfffffff8(%ebp) 0xb7816fa9 <IA__gtk_text_iter_ends_line+9>: mov 0x8(%ebp),%esi 0xb7816fac <IA__gtk_text_iter_ends_line+12>: mov %ebx,0xfffffff4(%ebp) 0xb7816faf <IA__gtk_text_iter_ends_line+15>: mov %edi,0xfffffffc(%ebp) 0xb7816fb2 <IA__gtk_text_iter_ends_line+18>: call 0xb762e6e7 <__i686.get_pc_thunk.bx> 0xb7816fb7 <IA__gtk_text_iter_ends_line+23>: add $0x194fe9,%ebx 0xb7816fbd <IA__gtk_text_iter_ends_line+29>: test %esi,%esi 0xb7816fbf <IA__gtk_text_iter_ends_line+31>: je 0xb78170a3 <IA__gtk_text_iter_ends_line+259> 0xb7816fc5 <IA__gtk_text_iter_ends_line+37>: mov %esi,(%esp) 0xb7816fc8 <IA__gtk_text_iter_ends_line+40>: call 0xb7815c90 <IA__gtk_text_iter_get_char> 0xb7816fcd <IA__gtk_text_iter_ends_line+45>: cmp $0xd,%eax 0xb7816fd0 <IA__gtk_text_iter_ends_line+48>: mov %eax,%ecx 0xb7816fd2 <IA__gtk_text_iter_ends_line+50>: sete %dl 0xb7816fd5 <IA__gtk_text_iter_ends_line+53>: cmp $0x2029,%eax 0xb7816fda <IA__gtk_text_iter_ends_line+58>: sete %al 0xb7816fdd <IA__gtk_text_iter_ends_line+61>: or %al,%dl 0xb7816fdf <IA__gtk_text_iter_ends_line+63>: jne 0xb7817030 <IA__gtk_text_iter_ends_line+144> 0xb7816fe1 <IA__gtk_text_iter_ends_line+65>: test %ecx,%ecx 0xb7816fe3 <IA__gtk_text_iter_ends_line+67>: je 0xb7817030 <IA__gtk_text_iter_ends_line+144> 0xb7816fe5 <IA__gtk_text_iter_ends_line+69>: xor %eax,%eax 0xb7816fe7 <IA__gtk_text_iter_ends_line+71>: cmp $0xa,%ecx 0xb7816fea <IA__gtk_text_iter_ends_line+74>: jne 0xb7817035 <IA__gtk_text_iter_ends_line+149> 0xb7816fec <IA__gtk_text_iter_ends_line+76>: cld 0xb7816fed <IA__gtk_text_iter_ends_line+77>: lea 0xffffffbc(%ebp),%edi 0xb7816ff0 <IA__gtk_text_iter_ends_line+80>: mov $0xe,%cl 0xb7816ff2 <IA__gtk_text_iter_ends_line+82>: mov 0xffffffbc(%ebp),%eax 0xb7816ff5 <IA__gtk_text_iter_ends_line+85>: rep movsl %ds:(%esi),%es:(%edi) 0xb7816ff7 <IA__gtk_text_iter_ends_line+87>: lea 0xffffffbc(%ebp),%edi 0xb7816ffa <IA__gtk_text_iter_ends_line+90>: mov %eax,(%esp) 0xb7816ffd <IA__gtk_text_iter_ends_line+93>: mov 0xffffffd4(%ebp),%esi 0xb7817000 <IA__gtk_text_iter_ends_line+96>: call 0xb77f7f00 <_gtk_text_btree_get_chars_changed_stamp> 0xb7817005 <IA__gtk_text_iter_ends_line+101>: cmp %eax,%esi 0xb7817007 <IA__gtk_text_iter_ends_line+103>: je 0xb7817040 <IA__gtk_text_iter_ends_line+160> 0xb7817009 <IA__gtk_text_iter_ends_line+105>: lea 0xfffdd7e8(%ebx),%eax 0xb781700f <IA__gtk_text_iter_ends_line+111>: movl $0x10,0x4(%esp) 0xb7817017 <IA__gtk_text_iter_ends_line+119>: mov %eax,0x8(%esp) 0xb781701b <IA__gtk_text_iter_ends_line+123>: lea 0xfff45076(%ebx),%eax 0xb7817021 <IA__gtk_text_iter_ends_line+129>: mov %eax,(%esp) 0xb7817024 <IA__gtk_text_iter_ends_line+132>: call 0xb762d5b8 <g_log@plt> 0xb7817029 <IA__gtk_text_iter_ends_line+137>: mov $0x1,%eax 0xb781702e <IA__gtk_text_iter_ends_line+142>: jmp 0xb7817035 <IA__gtk_text_iter_ends_line+149> 0xb7817030 <IA__gtk_text_iter_ends_line+144>: mov $0x1,%eax 0xb7817035 <IA__gtk_text_iter_ends_line+149>: mov 0xfffffff4(%ebp),%ebx 0xb7817038 <IA__gtk_text_iter_ends_line+152>: mov 0xfffffff8(%ebp),%esi 0xb781703b <IA__gtk_text_iter_ends_line+155>: mov 0xfffffffc(%ebp),%edi 0xb781703e <IA__gtk_text_iter_ends_line+158>: leave 0xb781703f <IA__gtk_text_iter_ends_line+159>: ret 0xb7817040 <IA__gtk_text_iter_ends_line+160>: mov 0xffffffd8(%ebp),%esi 0xb7817043 <IA__gtk_text_iter_ends_line+163>: mov 0xffffffbc(%ebp),%eax 0xb7817046 <IA__gtk_text_iter_ends_line+166>: mov %eax,(%esp) 0xb7817049 <IA__gtk_text_iter_ends_line+169>: call 0xb77f7f10 <_gtk_text_btree_get_segments_changed_stamp> 0xb781704e <IA__gtk_text_iter_ends_line+174>: cmp %eax,%esi 0xb7817050 <IA__gtk_text_iter_ends_line+176>: je 0xb781706e <IA__gtk_text_iter_ends_line+206> 0xb7817052 <IA__gtk_text_iter_ends_line+178>: movl $0x0,0xffffffdc(%ebp) 0xb7817059 <IA__gtk_text_iter_ends_line+185>: movl $0x0,0xffffffe0(%ebp) 0xb7817060 <IA__gtk_text_iter_ends_line+192>: movl $0xffffd8f0,0xffffffe4(%ebp) 0xb7817067 <IA__gtk_text_iter_ends_line+199>: movl $0xffffd8f0,0xffffffe8(%ebp) 0xb781706e <IA__gtk_text_iter_ends_line+206>: mov 0xc(%edi),%ecx 0xb7817071 <IA__gtk_text_iter_ends_line+209>: test %ecx,%ecx 0xb7817073 <IA__gtk_text_iter_ends_line+211>: js 0xb78170cc <IA__gtk_text_iter_ends_line+300> 0xb7817075 <IA__gtk_text_iter_ends_line+213>: mov 0xc(%edi),%esi 0xb7817078 <IA__gtk_text_iter_ends_line+216>: test %esi,%esi 0xb781707a <IA__gtk_text_iter_ends_line+218>: je 0xb7817030 <IA__gtk_text_iter_ends_line+144> 0xb781707c <IA__gtk_text_iter_ends_line+220>: movl $0x1,0x4(%esp) 0xb7817084 <IA__gtk_text_iter_ends_line+228>: mov %edi,(%esp) 0xb7817087 <IA__gtk_text_iter_ends_line+231>: call 0xb78185f0 <IA__gtk_text_iter_backward_chars> 0xb781708c <IA__gtk_text_iter_ends_line+236>: test %eax,%eax 0xb781708e <IA__gtk_text_iter_ends_line+238>: je 0xb7817030 <IA__gtk_text_iter_ends_line+144> 0xb7817090 <IA__gtk_text_iter_ends_line+240>: mov %edi,(%esp) 0xb7817093 <IA__gtk_text_iter_ends_line+243>: call 0xb7815c90 <IA__gtk_text_iter_get_char> 0xb7817098 <IA__gtk_text_iter_ends_line+248>: cmp $0xd,%eax 0xb781709b <IA__gtk_text_iter_ends_line+251>: setne %al 0xb781709e <IA__gtk_text_iter_ends_line+254>: movzbl %al,%eax 0xb78170a1 <IA__gtk_text_iter_ends_line+257>: jmp 0xb7817035 <IA__gtk_text_iter_ends_line+149> 0xb78170a3 <IA__gtk_text_iter_ends_line+259>: lea 0xfffe97ae(%ebx),%eax 0xb78170a9 <IA__gtk_text_iter_ends_line+265>: mov %eax,0x8(%esp) 0xb78170ad <IA__gtk_text_iter_ends_line+269>: lea 0xfffde83d(%ebx),%eax 0xb78170b3 <IA__gtk_text_iter_ends_line+275>: mov %eax,0x4(%esp) 0xb78170b7 <IA__gtk_text_iter_ends_line+279>: lea 0xfff45076(%ebx),%eax 0xb78170bd <IA__gtk_text_iter_ends_line+285>: mov %eax,(%esp) 0xb78170c0 <IA__gtk_text_iter_ends_line+288>: call 0xb762e538 <g_return_if_fail_warning@plt> 0xb78170c5 <IA__gtk_text_iter_ends_line+293>: xor %eax,%eax 0xb78170c7 <IA__gtk_text_iter_ends_line+295>: jmp 0xb7817035 <IA__gtk_text_iter_ends_line+149> 0xb78170cc <IA__gtk_text_iter_ends_line+300>: mov 0x8(%edi),%edx 0xb78170cf <IA__gtk_text_iter_ends_line+303>: test %edx,%edx 0xb78170d1 <IA__gtk_text_iter_ends_line+305>: js 0xb78170f2 <IA__gtk_text_iter_ends_line+338> 0xb78170d3 <IA__gtk_text_iter_ends_line+307>: lea 0x2c(%edi),%eax 0xb78170d6 <IA__gtk_text_iter_ends_line+310>: mov %edx,0x4(%esp) 0xb78170da <IA__gtk_text_iter_ends_line+314>: mov %eax,0xc(%esp) 0xb78170de <IA__gtk_text_iter_ends_line+318>: lea 0xc(%edi),%eax 0xb78170e1 <IA__gtk_text_iter_ends_line+321>: mov %eax,0x8(%esp) 0xb78170e5 <IA__gtk_text_iter_ends_line+325>: mov 0x4(%edi),%eax 0xb78170e8 <IA__gtk_text_iter_ends_line+328>: mov %eax,(%esp) 0xb78170eb <IA__gtk_text_iter_ends_line+331>: call 0xb77f98e0 <_gtk_text_line_byte_to_char_offsets> 0xb78170f0 <IA__gtk_text_iter_ends_line+336>: jmp 0xb7817075 <IA__gtk_text_iter_ends_line+213> 0xb78170f2 <IA__gtk_text_iter_ends_line+338>: lea 0xfffddcde(%ebx),%eax 0xb78170f8 <IA__gtk_text_iter_ends_line+344>: movl $0x169,0x8(%esp) 0xb7817100 <IA__gtk_text_iter_ends_line+352>: mov %eax,0x10(%esp) 0xb7817104 <IA__gtk_text_iter_ends_line+356>: lea 0xfffde234(%ebx),%eax 0xb781710a <IA__gtk_text_iter_ends_line+362>: mov %eax,0xc(%esp) 0xb781710e <IA__gtk_text_iter_ends_line+366>: lea 0xfffddc90(%ebx),%eax 0xb7817114 <IA__gtk_text_iter_ends_line+372>: mov %eax,0x4(%esp) 0xb7817118 <IA__gtk_text_iter_ends_line+376>: lea 0xfff45076(%ebx),%eax 0xb781711e <IA__gtk_text_iter_ends_line+382>: mov %eax,(%esp) 0xb7817121 <IA__gtk_text_iter_ends_line+385>: call 0xb762c1a8 <g_assert_warning@plt> End of assembler dump. (gdb) info reg eax 0x3 3 ecx 0x0 0 edx 0x0 0 ebx 0xb79abfa0 -1214595168 esp 0xbf8ce8e8 0xbf8ce8e8 ebp 0xbf8ce8e8 0xbf8ce8e8 esi 0x7d8d315f 2106405215 edi 0xbf8ce914 -1081284332 eip 0xb77f7f06 0xb77f7f06 <_gtk_text_btree_get_chars_changed_stamp+6> eflags 0x10246 [ PF ZF IF RF ] cs 0x73 115 ss 0x7b 123 ds 0x7b 123 es 0x7b 123 fs 0x0 0 gs 0x33 51 Note that the broken argument to _gtk_text_btree_get_chars_changed_stamp is loaded into esp from eax, which is set to the 0-offset member of ebp-0x44 (at +82: mov 0xffffffbc(%ebp), %eax): 0xb7816fec <IA__gtk_text_iter_ends_line+76>: cld 0xb7816fed <IA__gtk_text_iter_ends_line+77>: lea 0xffffffbc(%ebp),%edi 0xb7816ff0 <IA__gtk_text_iter_ends_line+80>: mov $0xe,%cl 0xb7816ff2 <IA__gtk_text_iter_ends_line+82>: mov 0xffffffbc(%ebp),%eax 0xb7816ff5 <IA__gtk_text_iter_ends_line+85>: rep movsl %ds:(%esi),%es:(%edi) 0xb7816ff7 <IA__gtk_text_iter_ends_line+87>: lea 0xffffffbc(%ebp),%edi 0xb7816ffa <IA__gtk_text_iter_ends_line+90>: mov %eax,(%esp) 0xb7816ffd <IA__gtk_text_iter_ends_line+93>: mov 0xffffffd4(%ebp),%esi 0xb7817000 <IA__gtk_text_iter_ends_line+96>: call 0xb77f7f00 <_gtk_text_btree_get_chars_changed_stamp> However eax is set *before* the iter (in gtk_text_iter_ends_line locals) is copied to ebp-0x44 (at +85), so contains junk instead of the 0-offset member of the tmp iter. So, this is an optimising (code order) bug; it looks like a strict-aliasing bug to me. gtk+ was compiled with gcc 4.1.1 Gentoo, -O3 -Wall. No strict-aliasing warnings were emitted to stderr.
Hope I got the component right; GtkTextIter is owned by GtkTextView, right? More info: this is 100% reproducible opening any document in gedit (with appropriate plugins installed; not sure which). However the above python script made for easier testing. The crash is seemingly very sensitive to compilation conditions (CFLAGS, gcc as above; x86; -mcpu=athlon-xp); it does not exhibit on ppc.
Created attachment 68959 [details] Testcase for program structure Test program; copies gtktextiter.c code structure
$ gcc punn.c -march=i686 -O2 -finline-functions -Wall $ ./a.out 0x804a008 (nil) 100 100 Note: * -finline-functions is allowing functions to be interleaved * Type punning is confusing gcc, causing it to access members of the struct before the type-punned version has been fully initialised * The padding members are necessary, to fill up the scheduler, I guess * Note that here I use -march=i686; -march=athlon-xp actually prevents the bug
Just to help: the implied call stack (without inlining) is:
+ Trace 69349
Hm. How to fix this? Any C gurus around? I guess a union of GtkTextIter and GtkTextIterReal is not on the cards, as we have to be portable. Should we perhaps convert to GtkTextIterReal at entrypoints and only pass around GtkTextIterReal to internals? But would that break with calling external API internally?
Hmm, I still, after several years, still don't understand the C aliasing rules, really. But: Note that this problem can only occur in GTK+, and unless there is interfile-optimization going on, only in gtktextiter.c. (I don't think anybody compiles GTK+ with inter-file optimization, but if we were being paranoid, we might want to allow for the possibility.) The problem depends on the caller knowing that the callee access GtkTextIter as GtkRealTextIter. Outside of GTK+, when there is just an opaque function that takes a GtkTextIter, it would be invalid for the compiler to move part of the copy after the function call. So, there is no ABI or API problem here. And the *only* way that we ever access GtkRealTextIter as GtkTextIter is when doing this sort of initialization assignment, so we can just add a private macro: #define GTK_TEXT_ITER_ASSIGN(DEST,SRC) \ *(GtkTextIterReal *)(src) = *(GtkTextIterReal *)(dest); And then replace: GtkTextIter tmp = *iter; With GtkTextIter tmp; GTK_TEXT_ITER_ASSIGN(&tmp, iter); (Or maybe have a macro that does the declare and assign...) and that should fix the problem.
OK, gotcha. That works, but it triggers a warning: warning: dereferencing type-punned pointer will break strict-aliasing rules We could avoid that by casting through char *: #define GTK_TEXT_ITER_ASSIGN(DEST,SRC) \ *(GtkTextIterReal *)(char *)(dest) = *(GtkTextIterReal *)(src); But in that case we may as well use memcpy: GtkTextIter tmp; memcpy (&tmp, iter, sizeof (GtkTextIterReal)); Plus it's more concise, and ends up with identical code (gcc optimises fixed-size memcpy to struct assignment, and vice-versa). I guess we should worry about inter-file optimisation, though; gcc doesn't do it yet, but it will eventually, and VC++ does already. So, we need to find all places in GTK+ where a GtkTextIter is used as an lvalue.
Or just use G_GNUC_MAY_ALIAS proposed in bug 335853.
Created attachment 68981 [details] [review] gtk-text-iter-alias-segv.patch Ah. Thanks! This seems to fix it here; it adds the may_alias attribute to GtkTextRealIter, so only gtktextiter.c is affected. I guess this makes bug 335853 a little more urgent; this patch defines G_GNUC_MAY_ALIAS locally but obviously it needs to go into glib asap.
Hmm, unfortunately, this gives an ICE with the gcc 4.1.1 I'm using here, which is gcc version 4.1.1 20060711 (Red Hat 4.1.1-7)
Ed, do you have a patch for the memcpy() variant of the fix ?
No, sorry. I trashed the patch when Behdad pointed out G_GNUC_MAY_ALIAS; it seems a far better solution. I remember how I generated it, though; typedef struct _GtkTextIter const GtkTextIter and let gcc find all the lvalues.
(In reply to comment #10) > Hmm, unfortunately, this gives an ICE with the gcc 4.1.1 I'm using here, > which is > > gcc version 4.1.1 20060711 (Red Hat 4.1.1-7) I read this again and again without understanding it. What does "ICE" mean here?!
ICE = internal compiler error
Matthias, does this still break Red Hat's gcc snapshots?
Yes, it still does with gcc version 4.1.1 20061220 (Red Hat 4.1.1-48) gcc -DHAVE_CONFIG_H -I. -I. -I.. -DG_LOG_DOMAIN=\"Gtk\" -DGTK_LIBDIR=\"/home/mclasen/gnome-2.18/install//lib\" -DGTK_DATADIR=\"/home/mclasen/gnome-2.18/install//share\" -DGTK_DATA_PREFIX=\"/home/mclasen/gnome-2.18/install/\" -DGTK_SYSCONFDIR=\"/home/mclasen/gnome-2.18/install//etc\" -DGTK_VERSION=\"2.10.7\" -DGTK_BINARY_VERSION=\"2.10.0\" -DGTK_HOST=\"i686-redhat-linux-gnu\" -DGTK_COMPILATION -DGTK_PRINT_BACKENDS=\"file,cups\" "-DGTK_PRINT_PREVIEW_COMMAND=\"evince --preview %f\"" -I../gtk -I.. -I../gdk -I../gdk -I../gdk-pixbuf -I../gdk-pixbuf -DGDK_PIXBUF_DISABLE_DEPRECATED -DGDK_DISABLE_DEPRECATED -DGTK_FILE_SYSTEM_ENABLE_UNSUPPORTED -DGTK_PRINT_BACKEND_ENABLE_UNSUPPORTED -DG_ENABLE_DEBUG -pthread -I/home/mclasen/gnome-2.18/install/include/glib-2.0 -I/home/mclasen/gnome-2.18/install/lib/glib-2.0/include -I/home/mclasen/gnome-2.18/install//include/pango-1.0 -I/home/mclasen/gnome-2.18/install/include/cairo -I/home/mclasen/gnome-2.18/install/include/atk-1.0 -g -O2 -g -Wall -MT gtktextiter.lo -MD -MP -MF .deps/gtktextiter.Tpo -c gtktextiter.c -fPIC -DPIC -o .libs/gtktextiter.o gtktextiter.c:81: internal compiler error: in splice_child_die, at dwarf2out.c:5503 Please submit a full bug report, with preprocessed source if appropriate. See <URL:http://bugzilla.redhat.com/bugzilla> for instructions. Preprocessed source stored into /tmp/ccRnSuJP.out file, please attach this to your bugreport.
Possibly http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28834 ?
Quite possibly, though this is with -O2 -g on i686, no -O3 required
*** Bug 346484 has been marked as a duplicate of this bug. ***
Created attachment 97514 [details] [review] gtk-text-iter-alias-segv.patch Here's a simplified patch now we have G_GNUC_MAY_ALIAS. Since the gcc ICE doesn't look to be fixed any time soon, might be worth thinking about other approaches.
Created attachment 97518 [details] [review] gtk-text-iter-alias-segv.2.patch Alternative approach, not using G_GNUC_MAY_ALIAS. Ugly, but it should work, right?
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28834#c31 The crash is fixed by ignoring the attribute on the typedef. If you want to apply may_alias to a struct type, you need to specify it in the type definition, either as struct __attribute ((may_alias)) name { ... }; or struct name { ... } __attribute ((may_alias)); -- jason at gcc dot gnu dot org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|ASSIGNED |RESOLVED Resolution| |FIXED So, it looks like adding G_GNUC_MAY_ALIAS to the typedef is the problem, and it should be added directly to the struct instead.
Created attachment 99369 [details] [review] gtk-text-iter-alias-segv.3.patch Add G_GNUC_MAY_ALIAS to the struct itself.
This works fine with gcc version 4.1.2 20070925 (Red Hat 4.1.2-33) 2007-11-22 Matthias Clasen <mclasen@redhat.com> * gtk/gtktextiter.c: Fix an aliasing problem. (#347585, Ed Catmur)