diff --git a/TRICKS b/TRICKS
index b56a38c..6883588 100644
--- a/TRICKS
+++ b/TRICKS
@@ -102,5 +102,11 @@ after observing the earlier writes by CPU0.
 So any reads in B are guaranteed to observe the
 effects of writes in A.
 
-Not sure about the second one yet.
+According to the Intel manual behavior spec, the
+second condition requires a serialization instruction
+in release, to avoid reads in A happening after giving
+up lk.  No Intel SMP processor in existence actually
+moves reads down after writes, but the language in
+the spec allows it.  There is no telling whether future
+processors will need it.
 
diff --git a/main.c b/main.c
index 2108d95..6ec6d19 100644
--- a/main.c
+++ b/main.c
@@ -50,8 +50,9 @@ mpmain(void)
   if(cpu() != mp_bcpu())
     lapic_init(cpu());
   setupsegs(0);
-  cpus[cpu()].booted = 1;
+  xchg(&cpus[cpu()].booted, 1);
 
+  cprintf("cpu%d: scheduling\n");
   scheduler();
 }
 
diff --git a/proc.h b/proc.h
index fa60452..502361d 100644
--- a/proc.h
+++ b/proc.h
@@ -56,7 +56,7 @@ struct cpu {
   struct context context;     // Switch here to enter scheduler
   struct taskstate ts;        // Used by x86 to find stack for interrupt
   struct segdesc gdt[NSEGS];  // x86 global descriptor table
-  volatile int booted;        // Has the CPU started?
+  volatile uint booted;        // Has the CPU started?
   int ncli;                   // Depth of pushcli nesting.
   int intena;                 // Were interrupts enabled before pushcli? 
 };
diff --git a/spinlock.c b/spinlock.c
index c00c978..ba5bb4a 100644
--- a/spinlock.c
+++ b/spinlock.c
@@ -10,12 +10,6 @@
 
 extern int use_console_lock;
 
-// Barrier to gcc's instruction reordering.
-static void inline gccbarrier(void)
-{
-  asm volatile("" : : : "memory");
-}
-
 void
 initlock(struct spinlock *lock, char *name)
 {
@@ -35,7 +29,10 @@ acquire(struct spinlock *lock)
   if(holding(lock))
     panic("acquire");
 
-  while(cmpxchg(0, 1, &lock->locked) == 1)
+  // The xchg is atomic.
+  // It also serializes, so that reads after acquire are not
+  // reordered before it.  
+  while(xchg(&lock->locked, 1) == 1)
     ;
 
   // Record info about lock acquisition for debugging.
@@ -56,8 +53,12 @@ release(struct spinlock *lock)
   lock->pcs[0] = 0;
   lock->cpu = 0xffffffff;
 
-  gccbarrier();  // Keep gcc from moving lock->locked = 0 earlier.
-  lock->locked = 0;
+  // The xchg serializes, so that reads before release are 
+  // not reordered after it.  (This reordering would be allowed
+  // by the Intel manuals, but does not happen on current 
+  // Intel processors.  The xchg being asm volatile also keeps
+  // gcc from delaying the above assignments.)
+  xchg(&lock->locked, 0);
 
   popcli();
 }
diff --git a/x86.h b/x86.h
index a1c66b5..1f2c881 100644
--- a/x86.h
+++ b/x86.h
@@ -96,35 +96,16 @@ write_eflags(uint eflags)
   asm volatile("pushl %0; popfl" : : "r" (eflags));
 }
 
-// XXX: Kill this if not used.
-static inline void
-cpuid(uint info, uint *eaxp, uint *ebxp, uint *ecxp, uint *edxp)
-{
-  uint eax, ebx, ecx, edx;
-
-  asm volatile("cpuid" :
-               "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) :
-               "a" (info));
-  if(eaxp)
-    *eaxp = eax;
-  if(ebxp)
-    *ebxp = ebx;
-  if(ecxp)
-    *ecxp = ecx;
-  if(edxp)
-    *edxp = edx;
-}
-
 static inline uint
-cmpxchg(uint oldval, uint newval, volatile uint* lock_addr)
+xchg(volatile uint *addr, uint newval)
 {
   uint result;
   
   // The + in "+m" denotes a read-modify-write operand.
-  asm volatile("lock; cmpxchgl %2, %0" :
-                       "+m" (*lock_addr), "=a" (result) :
-                       "r"(newval), "1"(oldval) :
-                       "cc");
+  asm volatile("lock; xchgl %0, %1" :
+               "+m" (*addr), "=a" (result) :
+               "1" (newval) :
+               "cc");
   return result;
 }