GNOME Bugzilla – Bug 628009
[volume] Float processing with orc broken
Last modified: 2010-08-27 01:01:35 UTC
Hi, the float processing of volume with orc is currently broken because of a bug in orc. We should either disable it again or let the next release depend on orc 0.4.8 (assuming that it's fixed there).
Output of the test in tests/check/orc of the float processing function is this btw (with latest GIT as of now): [...] orc_scalarmultiply_f32_ns: dest array 0 bad 0 0: f24b1d2f -> f2cb1d2f 00000000 * 1 0: aa6a9c6a -> aaea9c6a 00000000 * 2 0: b2039ebf -> b2839ebf 00000000 * 3 0: 7c7333fc -> 7cf333fc 00000000 * 4 0: 3f227a35 -> 3fa27a35 00000000 * 5 0: b7b70306 -> b8370306 00000000 * 6 0: 9ffde6a8 -> a07de6a8 00000000 * 7 0: ef93a0b6 -> f013a0b6 00000000 * 8 0: d556b0bd -> d5d6b0bd 00000000 * 9 0: 758ff686 -> 760ff686 00000000 * 10 0: 61ddc622 -> 625dc622 00000000 * 11 0: d7e1cfba -> d861cfba 00000000 * 12 0: bdabaf13 -> be2baf13 00000000 * 13 0: 5de752e4 -> 5e6752e4 00000000 * 14 0: e0c70ed8 -> e1470ed8 00000000 * 15 0: 8cb07f61 -> 8d307f61 00000000 * 16 0: bda42443 -> be242443 00000000 * 17 0: a572b6db -> a5f2b6db 00000000 * 18 0: c39f4c33 -> c41f4c33 00000000 * 19 0: 241330c8 -> 249330c8 00000000 * 20 0: 5d837e15 -> 5e037e15 00000000 * 21 0: 43a282e7 -> 442282e7 00000000 * 22 0: 7107cf60 -> 7187cf60 00000000 * 23 0: 77dc1fc8 -> 785c1fc8 00000000 * 24 0: e24cee17 -> e2ccee17 00000000 * 25 0: f0aad543 -> f12ad543 00000000 * 26 0: 1062a74a -> 10e2a74a 00000000 * 27 0: 1ba04dfe -> 1c204dfe 00000000 * 28 0: 55c26293 -> 56426293 00000000 * 29 0: 227e8de9 -> 22fe8de9 00000000 * 30 0: 88d3a29a -> 8953a29a 00000000 * 31 0: 68b178c3 -> 693178c3 00000000 * 32 0: 7c678893 -> 7ce78893 00000000 * 33 0: 0fd14a95 -> 10514a95 00000000 * 34 0: 803d4fbd -> 80000000 00000000 * 35 0: 74221f34 -> 74a21f34 00000000 * 36 0: de7fd0e3 -> deffd0e3 00000000 * 37 0: b0146bc1 -> b0946bc1 00000000 * 38 0: 5e44fedd -> 5ec4fedd 00000000 * 39 0: 17c5812b -> 1845812b 00000000 * 40 0: c30d690e -> c38d690e 00000000 * 41 0: bb7c0ea9 -> bbfc0ea9 00000000 * 42 0: 4a4abde6 -> 4acabde6 00000000 * 43 0: e72f9d42 -> e7af9d42 00000000 * 44 0: 30d4425f -> 3154425f 00000000 * 45 0: a6fc1347 -> a77c1347 00000000 * 46 0: 2b715c81 -> 2bf15c81 00000000 * 47 0: 3cb132d6 -> 3d3132d6 00000000 * 48 0: ee560ae0 -> eed60ae0 00000000 * 49 0: aa461857 -> aac61857 00000000 * 50 0: a89e691b -> a91e691b 00000000 * 51 0: 2c5fbf01 -> 2cdfbf01 00000000 * 52 0: 83d6305e -> 8456305e 00000000 * 53 0: bdcd7d54 -> be4d7d54 00000000 * 54 0: 277433df -> 27f433df 00000000 * 55 0: 8f0c849e -> 8f8c849e 00000000 * 56 0: 3759e362 -> 37d9e362 00000000 * 57 0: 95c56178 -> 96456178 00000000 * 58 0: d154cab6 -> d1d4cab6 00000000 * 59 0: fb4c7e48 -> fbcc7e48 00000000 * 60 0: 0fa01138 -> 10201138 00000000 * 61 0: ab20a2be -> aba0a2be 00000000 * 62 0: 8a63fc4d -> 8ae3fc4d 00000000 * 63 0: c8706e5a -> c8f06e5a 00000000 * 64 0: d42f6aea -> d4af6aea 00000000 * .global orc_scalarmultiply_f32_ns .p2align 4 orc_scalarmultiply_f32_ns: rdtsc movl %eax, 592(%rdi) stmxcsr 596(%rdi) movl 596(%rdi), %ecx movl %ecx, 600(%rdi) orl $32832, %ecx movl %ecx, 596(%rdi) ldmxcsr 596(%rdi) # 1: loadpl movd 632(%rdi), %xmm0 pshufd $0x0000, %xmm0, %xmm0 movl $16, %eax subl 24(%rdi), %eax andl $15, %eax sarl $2, %eax cmpl %eax, 8(%rdi) jle 6f movl %eax, 12(%rdi) movl 8(%rdi), %ecx subl %eax, %ecx movl %ecx, %eax sarl $3, %ecx movl %ecx, 16(%rdi) andl $7, %eax movl %eax, 20(%rdi) jmp 7f 6: movl 8(%rdi), %eax movl %eax, 12(%rdi) movl $0, %eax movl %eax, 16(%rdi) movl %eax, 20(%rdi) 7: mov 24(%rdi), %rax mov 56(%rdi), %rdx # LOOP SHIFT 0 testl $1, 12(%rdi) je 13f # 0: loadl movd 0(%rdx), %xmm1 # 2: mulf mulps %xmm0, %xmm1 # 3: storel movd %xmm1, 0(%rax) lea 4(%rax), %rax lea 4(%rdx), %rdx 13: # LOOP SHIFT 1 testl $2, 12(%rdi) je 14f # 0: loadl movq 0(%rdx), %xmm1 # 2: mulf mulps %xmm0, %xmm1 # 3: storel movq %xmm1, 0(%rax) lea 8(%rax), %rax lea 8(%rdx), %rdx 14: 1: cmpl $0, 16(%rdi) je 3f movl 16(%rdi), %esi # LOOP SHIFT 2 nop nop nop nop nop nop nop nop nop nop nop nop nop 2: # 0: loadl movdqu 0(%rdx), %xmm1 # 2: mulf mulps %xmm0, %xmm1 # 3: storel movdqa %xmm1, 0(%rax) # 0: loadl movdqu 16(%rdx), %xmm1 # 2: mulf mulps %xmm0, %xmm1 # 3: storel movdqa %xmm1, 16(%rax) lea 32(%rax), %rax lea 32(%rdx), %rdx addl $-1, %esi jne 2b 3: # LOOP SHIFT 2 testl $4, 20(%rdi) je 10f # 0: loadl movdqu 0(%rdx), %xmm1 # 2: mulf mulps %xmm0, %xmm1 # 3: storel movdqu %xmm1, 0(%rax) lea 16(%rax), %rax lea 16(%rdx), %rdx 10: # LOOP SHIFT 1 testl $2, 20(%rdi) je 9f # 0: loadl movq 0(%rdx), %xmm1 # 2: mulf mulps %xmm0, %xmm1 # 3: storel movq %xmm1, 0(%rax) lea 8(%rax), %rax lea 8(%rdx), %rdx 9: # LOOP SHIFT 0 testl $1, 20(%rdi) je 8f # 0: loadl movd 0(%rdx), %xmm1 # 2: mulf mulps %xmm0, %xmm1 # 3: storel movd %xmm1, 0(%rax) lea 4(%rax), %rax lea 4(%rdx), %rdx 8: ldmxcsr 600(%rdi) rdtsc movl %eax, 596(%rdi) retq [...]
I'll readd the conditional build stuff :/
With orc git it looks like this: orc_scalarmultiply_f32_ns: backup function : PASSED dest array 0 bad 0 0: 657a54ef -> 00000000 00000000 1 0: 3e4aecea -> 00000000 00000000 2 0: cb0e741d -> 00000000 80000000 * 3 0: 1ca2e423 -> 00000000 00000000 4 0: 896a0169 -> 00000000 80000000 * 5 0: c70e8e8a -> 00000000 80000000 * 6 0: acf53268 -> 00000000 80000000 * 7 0: c282280e -> 00000000 80000000 * 8 0: 8d11a848 -> 00000000 80000000 * 9 0: 9cb01900 -> 00000000 80000000 * 10 0: 529df357 -> 00000000 00000000 11 0: 72847687 -> 00000000 00000000 12 0: 6c750c7b -> 00000000 00000000 13 0: 69a77f2b -> 00000000 00000000 14 0: 18efdbb9 -> 00000000 00000000 15 0: 36fe214d -> 00000000 00000000 16 0: e05db0b1 -> 00000000 80000000 * 17 0: 982a73ab -> 00000000 80000000 * 18 0: 1990cb1e -> 00000000 00000000 19 0: da073bdf -> 00000000 80000000 * 20 0: 6451d658 -> 00000000 00000000 21 0: 543069df -> 00000000 00000000 22 0: 2fe768d4 -> 00000000 00000000 23 0: ea77997d -> 00000000 80000000 * 24 0: 349682a1 -> 00000000 00000000 25 0: 896f94e6 -> 00000000 80000000 * 26 0: f71d14ea -> 00000000 80000000 * 27 0: afe5cd24 -> 00000000 80000000 * 28 0: 4b357779 -> 00000000 00000000 29 0: e160e59e -> 00000000 80000000 * 30 0: cb17f230 -> 00000000 80000000 * 31 0: 34a72a94 -> 00000000 00000000 32 0: 63f53790 -> 00000000 00000000 33 0: c73912a6 -> 00000000 80000000 * 34 0: c77de534 -> 00000000 80000000 * 35 0: 46d4c24d -> 00000000 00000000 36 0: f85b0655 -> 00000000 80000000 * 37 0: 67f18b5f -> 00000000 00000000 38 0: c4b69146 -> 00000000 80000000 * 39 0: 6e446a8d -> 00000000 00000000 40 0: 44b0e7f7 -> 00000000 00000000 41 0: ab3f85e5 -> 00000000 80000000 * 42 0: 5ee85774 -> 00000000 00000000 43 0: f88eb353 -> 00000000 80000000 * 44 0: 43fa9c65 -> 00000000 00000000 45 0: 3e9af31a -> 00000000 00000000 46 0: f0fb5c8d -> 00000000 80000000 * 47 0: ef08f15d -> 00000000 80000000 * 48 0: af00a94d -> 00000000 80000000 * 49 0: 8c38879b -> 00000000 80000000 * 50 0: 95968221 -> 00000000 80000000 * 51 0: 1ec9372c -> 00000000 00000000 52 0: 0449511f -> 00000000 00000000 53 0: be12b4c8 -> 00000000 80000000 * 54 0: 29206c7e -> 00000000 00000000 55 0: 10a959ff -> 00000000 00000000 56 0: 7c1d960b -> 00000000 00000000 57 0: c1deaebf -> 00000000 80000000 * 58 0: 45bf7eb3 -> 00000000 00000000 59 0: 0f3ee8d3 -> 00000000 00000000 60 0: 13813cfe -> 00000000 00000000 61 0: 40136760 -> 00000000 00000000 62 0: 455bd98f -> 00000000 00000000 63 0: 27e03868 -> 00000000 00000000 64 0: 843fc6a8 -> 00000000 80000000 * 65 0: a4e79249 -> 00000000 80000000 * 66 0: 459b62a3 -> 00000000 00000000 67 0: 23a55c3e -> 00000000 00000000 68 0: 49db7678 -> 00000000 00000000 69 0: 1a53a3db -> 00000000 00000000 I have pushed this commit to at least have a working volume element. If it turn our hard to get fixed I disable the orc code. commit e4d33ef53baa5ae1d28743992e7f97635ac5411f Author: Stefan Kost <ensonic@users.sf.net> Date: Thu Aug 26 15:17:20 2010 +0300 volume: make the orc usage for float conditional again
Found it. commit 21cc9b3a1c36619644d640f94f375f25a6c715ac Author: David Schleef <ds@schleef.org> Date: Thu Aug 26 17:58:42 2010 -0700 c: Fix loading of float params Float params are loaded from orc_union32 pointers into orc_union32 temporaries, so it's important to not to a float->int conversion in there.