Missing volatile in hard_smp_processor_id.
Gilles Chanteperdrix
gilles.chanteperdrix at xenomai.org
Mon Jun 27 19:03:41 EDT 2011
Hi,
all the versions of codesourcery from 2009q1 to 2010.09 optimize too
eagerly the calls to hard_smp_processor_id.
The following code:
#define hard_smp_processor_id() \
({ \
unsigned int cpunum; \
__asm__("\n" \
"1: mrc p15, 0, %0, c0, c0, 5\n" \
" .pushsection \".alt.smp.init\", \"a\"\n"\
" .long 1b\n" \
" mov %0, #0\n" \
" .popsection" \
: "=r" (cpunum)); \
cpunum &= 0x0F; \
})
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags;
asm volatile(
" mrs %0, cpsr @ arch_local_irq_save\n"
" cpsid i"
: "=r" (flags) : : "memory", "cc");
return flags;
}
static inline void arch_local_irq_restore(unsigned long flags)
{
asm volatile(
" msr cpsr_c, %0 @ local_irq_restore"
:
: "r" (flags)
: "memory", "cc");
}
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0 => preemptable, <0 => bug */
unsigned cpu; /* cpu */
};
static inline struct thread_info *current_thread_info(void) __attribute__((const));
static inline struct thread_info *current_thread_info(void)
{
register unsigned long sp asm ("sp");
return (struct thread_info *)(sp & ~8191);
}
extern int printk(const char *fmt, ...);
void f(void)
{
unsigned long flags;
unsigned cpu0, cpu1;
flags = arch_local_irq_save();
cpu0 = hard_smp_processor_id();
cpu1 = current_thread_info()->cpu;
arch_local_irq_restore(flags);
printk("cpu %d, %d\n", cpu0, cpu1);
flags = arch_local_irq_save();
cpu0 = hard_smp_processor_id();
cpu1 = current_thread_info()->cpu;
arch_local_irq_restore(flags);
printk("cpu %d\n", cpu0, cpu1);
}
gives:
00000024 <f>:
24: e92d41f0 push {r4, r5, r6, r7, r8, lr}
28: ebfffff4 bl 0 <arch_local_irq_save>
2c: ee104fb0 mrc 15, 0, r4, cr0, cr0, {5}
30: e204400f and r4, r4, #15
34: e1a07000 mov r7, r0
38: ebfffff5 bl 14 <current_thread_info>
3c: e5906008 ldr r6, [r0, #8]
40: e1a05000 mov r5, r0
44: e1a00007 mov r0, r7
48: ebffffef bl c <arch_local_irq_restore>
4c: e1a01004 mov r1, r4
50: e1a02006 mov r2, r6
54: e59f0020 ldr r0, [pc, #32] ; 7c <f+0x58>
58: ebfffffe bl 0 <printk>
5c: ebffffe7 bl 0 <arch_local_irq_save>
60: e5955008 ldr r5, [r5, #8]
64: ebffffe8 bl c <arch_local_irq_restore>
68: e59f0010 ldr r0, [pc, #16] ; 80 <f+0x5c>
6c: e1a01004 mov r1, r4
70: e1a02005 mov r2, r5
74: e8bd41f0 pop {r4, r5, r6, r7, r8, lr}
78: eafffffe b 0 <printk>
7c: 00000000 .word 0x00000000
80: 0000000c .word 0x0000000c
Where we see that the cpu number is read only once from cp15. This
is problematic, as a task may migrate as soon as irqs are on.
Adding a "volatile" to the inline assembly in hard_smp_processor_id()
fixes it.
I know hard_smp_processor_id() disappeared after Linux 2.6.37, but this
may be interesting for the stable and long term support branches. This
issue is suspected to cause random segmentation faults on OMAP4 with
CONFIG_HIGHMEM on.
Regards.
--
Gilles.
More information about the linux-arm-kernel
mailing list