Missing volatile in hard_smp_processor_id.

Gilles Chanteperdrix gilles.chanteperdrix at xenomai.org
Mon Jun 27 19:03:41 EDT 2011


all the versions of codesourcery from 2009q1 to 2010.09 optimize too
eagerly the calls to hard_smp_processor_id.

The following code:
#define hard_smp_processor_id()                                         \
	({                                                              \
		unsigned int cpunum;                                    \
		__asm__("\n"                                            \
			"1:     mrc p15, 0, %0, c0, c0, 5\n"            \
			"       .pushsection \".alt.smp.init\", \"a\"\n"\
			"       .long   1b\n"                           \
			"       mov     %0, #0\n"                       \
			"       .popsection"                            \
			: "=r" (cpunum));                               \
		cpunum &= 0x0F;                                         \

static inline unsigned long arch_local_irq_save(void)
	unsigned long flags;

	asm volatile(
		"       mrs     %0, cpsr        @ arch_local_irq_save\n"
		"       cpsid   i"
		: "=r" (flags) : : "memory", "cc");
	return flags;

static inline void arch_local_irq_restore(unsigned long flags)
	asm volatile(
		"       msr     cpsr_c, %0      @ local_irq_restore"
		: "r" (flags)
		: "memory", "cc");

struct thread_info {
	unsigned long flags;          /* low level flags */
	int           preempt_count;  /* 0 => preemptable, <0 => bug */
	unsigned      cpu;            /* cpu */

static inline struct thread_info *current_thread_info(void) __attribute__((const));

static inline struct thread_info *current_thread_info(void)
	register unsigned long sp asm ("sp");
	return (struct thread_info *)(sp & ~8191);

extern int printk(const char *fmt, ...);

void f(void)
	unsigned long flags;
	unsigned cpu0, cpu1;

	flags = arch_local_irq_save();
	cpu0 = hard_smp_processor_id();
	cpu1 = current_thread_info()->cpu;
	printk("cpu %d, %d\n", cpu0, cpu1);

	flags = arch_local_irq_save();
	cpu0 = hard_smp_processor_id();
	cpu1 = current_thread_info()->cpu;
	printk("cpu %d\n", cpu0, cpu1);

00000024 <f>:
  24:   e92d41f0        push    {r4, r5, r6, r7, r8, lr}
  28:   ebfffff4        bl      0 <arch_local_irq_save>
  2c:   ee104fb0        mrc     15, 0, r4, cr0, cr0, {5}
  30:   e204400f        and     r4, r4, #15
  34:   e1a07000        mov     r7, r0
  38:   ebfffff5        bl      14 <current_thread_info>
  3c:   e5906008        ldr     r6, [r0, #8]
  40:   e1a05000        mov     r5, r0
  44:   e1a00007        mov     r0, r7
  48:   ebffffef        bl      c <arch_local_irq_restore>
  4c:   e1a01004        mov     r1, r4
  50:   e1a02006        mov     r2, r6
  54:   e59f0020        ldr     r0, [pc, #32]   ; 7c <f+0x58>
  58:   ebfffffe        bl      0 <printk>
  5c:   ebffffe7        bl      0 <arch_local_irq_save>
  60:   e5955008        ldr     r5, [r5, #8]
  64:   ebffffe8        bl      c <arch_local_irq_restore>
  68:   e59f0010        ldr     r0, [pc, #16]   ; 80 <f+0x5c>
  6c:   e1a01004        mov     r1, r4
  70:   e1a02005        mov     r2, r5
  74:   e8bd41f0        pop     {r4, r5, r6, r7, r8, lr}
  78:   eafffffe        b       0 <printk>
  7c:   00000000        .word   0x00000000
  80:   0000000c        .word   0x0000000c

Where we see that the cpu number is read only once from cp15. This
is problematic, as a task may migrate as soon as irqs are on.

Adding a "volatile" to the inline assembly in hard_smp_processor_id()
fixes it.

I know hard_smp_processor_id() disappeared after Linux 2.6.37, but this
may be interesting for the stable and long term support branches. This
issue is suspected to cause random segmentation faults on OMAP4 with 



More information about the linux-arm-kernel mailing list