[AD] timer_mutex again

[ Thread Index | Date Index | More lists.liballeg.org/allegro-developers Archives ]


On the debug, C-only, X11 port of Allegro I can easily get a deadlock when grabber starts up. At some stage, show_mouse(NULL) is called, which calls remove_int(), which locks the timer_mutex in timer.c, but the timer_mutex is already locked by _handle_timer_tick() while it tries to fire off timer callback functions. Sound familiar?

I think _handle_timer_tick() doesn't actually need to lock the timer_mutex for its duration. As far as I can tell, it's only to prevent install_int() and remove_int() calls from messing about with the _timer_queue global structure (which contains the list of registered timer callbacks) that could, e.g. cause _handle_timer_tick() to derefence a NULL pointer. So my proposed fix for _handle_timer_tick() is to:

1. lock the timer_mutex for a short period only, make a local copy of the _timer_queue, unlock the mutex 2. run the timer callbacks off the local_timer_queue, make updates only to the local_timer_queue 3. lock the timer_mutex again; reflect changes in the local copy to the global _timer_queue; unlock the mutex

"Changes in the local copy" should only be reflected if a slot in the _timer_queue structure still contains the same callback (and parameters) that was there at step 1. To do this, I added a variable to the TIMER_QUEUE structure so that each slot has an integer called "marker". Whenever a callback is added or removed from that slot, the integer is incremented. Then if a slot in the _timer_queue was changed in step 2, we can know that by comparing that with the "marker" in the local copy.[1]

I hope that made sense. It's not pretty nor lightweight and I probably went wrong somewhere. A minimally-tested patch is attached.

Peter

[1] I might have overdone this. We could just compare the _timer_queue[i].proc, .param_proc and .param fields instead of a new "marker" field, though that would not catch the case of remove_int(cb) followed immediately by install_int(cb) if both were done during step 2.
Index: include/allegro/internal/aintern.h
===================================================================
RCS file: /cvsroot/alleg/allegro/include/allegro/internal/aintern.h,v
retrieving revision 1.34
diff -u -r1.34 aintern.h
--- include/allegro/internal/aintern.h	7 Apr 2005 22:37:20 -0000	1.34
+++ include/allegro/internal/aintern.h	17 Apr 2005 09:47:18 -0000
@@ -145,6 +145,11 @@
    void *param;                        /* param for param_proc if used */
    long speed;                         /* timer speed */
    long counter;                       /* counts down to zero=blastoff */
+#ifdef ALLEGRO_MULTITHREADED
+   int marker;                         /* incremented when a procedure
+					* is installed or removed from
+					* this slot, and never reset */
+#endif
 } TIMER_QUEUE;
 
 AL_ARRAY(TIMER_QUEUE, _timer_queue);
Index: src/timer.c
===================================================================
RCS file: /cvsroot/alleg/allegro/src/timer.c,v
retrieving revision 1.20
diff -u -r1.20 timer.c
--- src/timer.c	14 Mar 2005 11:41:59 -0000	1.20
+++ src/timer.c	17 Apr 2005 09:47:21 -0000
@@ -61,23 +61,116 @@
 /* _handle_timer_tick:
  *  Called by the driver to handle a timer tick.
  */
+#ifdef ALLEGRO_MULTITHREADED
+
 long _handle_timer_tick(int interval)
 {
+   TIMER_QUEUE local_timer_queue[MAX_TIMERS];
+   long delta_counter[MAX_TIMERS];
    long new_delay = 0x8000;
    long d;
    int i;
 
+   ASSERT(sizeof(local_timer_queue) == sizeof(_timer_queue));
+
    timer_delay += interval;
 
-#ifdef ALLEGRO_MULTITHREADED
+   /* In multithreaded mode, the worry is having _timer_queue change
+    * behind our back while the registered callbacks are still being
+    * called.  We tried to solve this by locking the timer_mutex while
+    * the callbacks were firing, but that led to many deadlocks (in
+    * particular with the mouse code).  Instead we lock the
+    * timer_mutex just long enough to make a copy of the _timer_queue,
+    * then work with the copy for the rest of the function.
+    */
+   system_driver->lock_mutex(timer_mutex);
+
+   for (i=0; i<MAX_TIMERS; i++) {
+      local_timer_queue[i] = _timer_queue[i];
+      delta_counter[i] = 0;
+   }
+
+   system_driver->unlock_mutex(timer_mutex);
+
+   d = timer_delay;
+
+   /* deal with retrace synchronisation */
+   vsync_counter -= d; 
+
+   while (vsync_counter <= 0) {
+      vsync_counter += _vsync_speed;
+      retrace_count++;
+      if (retrace_proc)
+	 retrace_proc();
+   }
+
+   /* process the user callbacks */
+   for (i=0; i<MAX_TIMERS; i++) { 
+      if (((local_timer_queue[i].proc) || (local_timer_queue[i].param_proc)) &&
+	  (local_timer_queue[i].speed > 0)) {
+
+	 local_timer_queue[i].counter -= d;
+	 delta_counter[i] -= d;
+
+	 while ((local_timer_queue[i].counter <= 0) && 
+		((local_timer_queue[i].proc) || (local_timer_queue[i].param_proc)) && 
+		(local_timer_queue[i].speed > 0)) {
+	    local_timer_queue[i].counter += local_timer_queue[i].speed;
+	    delta_counter[i] += local_timer_queue[i].speed;
+	    if (local_timer_queue[i].param_proc)
+	       local_timer_queue[i].param_proc(local_timer_queue[i].param);
+	    else
+	       local_timer_queue[i].proc();
+	 }
+
+	 if ((local_timer_queue[i].counter > 0) && 
+	     ((local_timer_queue[i].proc) || (local_timer_queue[i].param_proc)) && 
+	     (local_timer_queue[i].counter < new_delay)) {
+	    new_delay = local_timer_queue[i].counter;
+	 }
+      }
+   }
+
+   timer_delay -= d;
+
+   /* Update _timer_queue to reflect the changes we made to the
+    * `counter' fields, if the `marker' fields are different.  If so,
+    * that means a different proc is occupying the slot, compared to
+    * at the start of the procedure.
+    */
    system_driver->lock_mutex(timer_mutex);
-#else
+
+   for (i=0; i<MAX_TIMERS; i++) {
+      if (local_timer_queue[i].marker == _timer_queue[i].marker)
+	 _timer_queue[i].counter += delta_counter[i];
+   }
+
+   system_driver->unlock_mutex(timer_mutex);
+
+#ifdef ALLEGRO_WINDOWS
+   /* fudge factor to prevent interrupts from coming too close to each other */
+   if (new_delay < MSEC_TO_TIMER(1))
+      new_delay = MSEC_TO_TIMER(1);
+#endif
+
+   return new_delay;
+}
+
+#else  /* !ALLEGRO_MULTITHREADED */
+
+long _handle_timer_tick(int interval)
+{
+   long new_delay = 0x8000;
+   long d;
+   int i;
+
+   timer_delay += interval;
+
    /* reentrant interrupt? */
    if (timer_semaphore)
       return 0x2000;
 
    timer_semaphore = TRUE;
-#endif
 
    d = timer_delay;
 
@@ -118,21 +212,13 @@
 
    timer_delay -= d;
 
-#ifdef ALLEGRO_MULTITHREADED
-   system_driver->unlock_mutex(timer_mutex);
-#else
    timer_semaphore = FALSE;
-#endif
-
-#ifdef ALLEGRO_WINDOWS
-   /* fudge factor to prevent interrupts from coming too close to each other */
-   if (new_delay < MSEC_TO_TIMER(1))
-      new_delay = MSEC_TO_TIMER(1);
-#endif
 
    return new_delay;
 }
 
+#endif /* !ALLEGRO_MULTITHREADED */
+
 END_OF_FUNCTION(_handle_timer_tick);
 
 
@@ -347,6 +433,10 @@
       }
       else
 	 _timer_queue[x].proc = proc;
+
+#ifdef ALLEGRO_MULTITHREADED
+      _timer_queue[x].marker++;
+#endif
    }
 
    _timer_queue[x].speed = speed;
@@ -452,6 +542,8 @@
    _timer_queue[x].counter = 0;
 
 #ifdef ALLEGRO_MULTITHREADED
+   _timer_queue[x].marker++;
+
    system_driver->unlock_mutex(timer_mutex);
 #endif
 }


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/