negative pop3d worker count

Ken Murchison ken at oceana.com
Thu Apr 7 20:57:25 EDT 2005


Henrique de Moraes Holschuh wrote:

> On Thu, 07 Apr 2005, Jules Agee wrote:
> 
>>Henrique de Moraes Holschuh wrote:
>>
>>>I remember squashing a bug on Debian's 2.1 master code that caused it not 
>>>to
>>>process *all* available messages at every interaction (instead, it 
>>>processed
>>>only one).  This caused messages to pile up in busy sites and triggered the
>>>above issues.  As I said, I think 2.2 does not have this bug, but it is
>>>something that could be checked.
>>
>>Yup, and thanks very much for squashing that bug!
>>
>>It looks to me like 2.2 does indeed still have it, the master code still 
>>only processes one message in each service loop.
> 
> 
> Ok, here is the patch that fixed issues with Debian 2.1. It will *not* apply
> to 2.2, but it will give you the correct path to forward-port it.

Would one of you please bugzilla this with the patch below and any 
others that you might have, so Derrick and I won't forget about it?



> 
> 
> 
> ------------------------------------------------------------------------
> 
> ---------------------
> PatchSet 1840 
> Date: 2004/08/06 15:54:55
> Author: hmh
> Branch: HEAD
> Tag: (none) 
> Log:
>   * Time to remember dead children is now a command line option (-J)
>   * Make sure to process all pending messages of children of
>     a service in one go.  Thanks to Jules Agee <julesa at pcf.com> for
>     tracking down this problem, and testing the new code
>     (REQUIRED FOR HIGH-VOLUME SITES)
>   *** SYNC TO 2.1.16-0woody.6.2 ***
> 
> Members: 
> 	master/master.c:1.52->1.53 
> 
> Index: debian/cyrus21-imapd/master/master.c
> ===================================================================
> RCS file: /home/cvs/debian/cyrus21-imapd/master/master.c,v
> retrieving revision 1.52
> retrieving revision 1.53
> diff -u -r1.52 -r1.53
> --- debian/cyrus21-imapd/master/master.c	19 Dec 2003 09:29:22 -0000	1.52
> +++ debian/cyrus21-imapd/master/master.c	6 Aug 2004 15:54:55 -0000	1.53
> @@ -154,6 +154,8 @@
>  static struct centry *ctable[child_table_size];
>  static struct centry *cfreelist;
>  
> +static int child_mourning_time = 2;	/* Time in seconds to remember child
> +					   after processing SIGCHLD */
>  static int janitor_frequency = 1;	/* Janitor sweeps per second */
>  static int janitor_position;		/* Entry to begin at in next sweep */
>  static struct timeval janitor_mark;	/* Last time janitor did a sweep */
> @@ -884,16 +886,16 @@
>  		}
>  	    }
>  	    c->service_state = SERVICE_STATE_DEAD;
> -	    c->janitor_deadline = time(NULL) + 2;
> +	    c->janitor_deadline = time(NULL) + child_mourning_time;
>  	} else {
>  	    /* weird. Are we multithreaded now? we don't know this child */
>  	    syslog(LOG_WARNING,
> -		   "receiving signals from unregistered child %d. Handling it anyway",
> +		   "notified of death of unregistered child %d. Handling it anyway",
>  		   pid);
>  	    c = get_centry();
>  	    c->pid = pid;
>  	    c->service_state = SERVICE_STATE_DEAD;
> -	    c->janitor_deadline = time(NULL) + 2;
> +	    c->janitor_deadline = time(NULL) + child_mourning_time;
>  	    c->s = NULL;
>  	    c->next = ctable[pid % child_table_size];
>  	    ctable[pid % child_table_size] = c;
> @@ -1052,6 +1054,36 @@
>      }
>  }
>  
> +/*
> + * Receives a message from a service.
> + *
> + * Returns zero if all goes well
> + * 1 if no msg available
> + * 2 if bad message received (incorrectly sized)
> + * -1 on error (errno set)
> + */
> +int read_msg(int fd, struct notify_message *msg)
> +{
> +	ssize_t r;
> +	size_t off = 0;
> +	int s = sizeof(struct notify_message);
> +
> +	while (s > 0) {
> +	    do
> +		r = read(fd, msg + off, s);
> +	    while ((r == -1) && (errno == EINTR));
> +	    if (r <= 0) break;
> +	    s -= r;
> +	    off += r;
> +	}
> +	if ( ((r == 0) && (off == 0)) ||
> +	     ((r == -1) && (errno == EAGAIN)) )
> +	    return 1;
> +	if (r == -1) return -1;
> +	if (s != 0) return 2;
> +	return 0;
> +}
> +
>  void process_msg(struct service *s, struct notify_message *msg)
>  {
>      struct centry * c;
> @@ -1655,7 +1687,7 @@
>  
>      p = getenv("CYRUS_VERBOSE");
>      if (p) verbose = atoi(p) + 1;
> -    while ((opt = getopt(argc, argv, "p:l:Ddj:")) != EOF) {
> +    while ((opt = getopt(argc, argv, "p:l:Ddj:J:")) != EOF) {
>  	switch (opt) {
>  	case 'l':
>              /* user defined listen queue backlog */
> @@ -1681,7 +1713,15 @@
>  	    /* Janitor frequency */
>  	    janitor_frequency = atoi(optarg);
>  	    if(janitor_frequency < 1)
> -		fatal("The janitor period must be at least 1 second", EX_CONFIG);
> +		fatal("The janitor frequency must be at least once per second",
> +			EX_CONFIG);
> +	    break;
> +	case 'J':
> +	    /* Janitor delay before cleanup of a child */
> +	    child_mourning_time = atoi(optarg);
> +	    if(child_mourning_time < 1)
> +		fatal("The janitor's mourning time interval must be at least 1 second", 
> +			EX_CONFIG);
>  	    break;
>  	default:
>  	    break;
> @@ -2016,13 +2056,19 @@
>  	    int j;
>  
>  	    if (FD_ISSET(x, &rfds)) {
> -		r = read(x, &msg, sizeof(msg));
> -		if (r != sizeof(msg)) {
> -		    syslog(LOG_ERR, "got incorrectly sized response from child: %x", i);
> +		while ((r = read_msg(x, &msg)) == 0)
> +		    process_msg(&Services[i], &msg);
> +
> +		if (r == 2) {
> +		    syslog(LOG_ERR, 
> +			"got incorrectly sized response from child: %x", i);
> +		    continue;
> +		}
> +		if (r < 0) {
> +		    syslog(LOG_ERR,
> +			"error while receiving message from child %x: %m", i);
>  		    continue;
>  		}
> -
> -		process_msg(&Services[i], &msg);
>  	    }
>  
>  	    if (Services[i].exec &&
> ---------------------
> PatchSet 1841 
> Date: 2004/08/06 17:50:00
> Author: hmh
> Branch: HEAD
> Tag: (none) 
> Log:
>   * Time to remember dead children is now a command line option (-J)
> 
> Members: 
> 	man/master.8:1.5->1.6 
> 
> Index: debian/cyrus21-imapd/man/master.8
> ===================================================================
> RCS file: /home/cvs/debian/cyrus21-imapd/man/master.8,v
> retrieving revision 1.5
> retrieving revision 1.6
> diff -u -r1.5 -r1.6
> --- debian/cyrus21-imapd/man/master.8	25 Mar 2003 14:37:33 -0000	1.5
> +++ debian/cyrus21-imapd/man/master.8	6 Aug 2004 17:50:04 -0000	1.6
> @@ -54,7 +54,11 @@
>  ]
>  [
>  .B \-j
> -.I janitor period
> +.I janitor frequency
> +]
> +[
> +.B \-J
> +.I janitor's mourning time interval
>  ]
>  [
>  .B \-d
> @@ -86,6 +90,12 @@
>  fork rate (and you have not increased the child hash table size when you
>  compiled Cyrus from its default of 10000 entries).
>  .TP
> +.BI \-J " janitor mourining time interval"
> +Sets the amount of time, in seconds, to remember a child process after
> +it exits.  You only need to increase this from the default of 2 seconds,
> +if you see messages in syslog complaining of messages from unknown or
> +unregistered children.
> +.TP
>  .BI \-p " pidfile"
>  Use
>  .I pidfile


-- 
Kenneth Murchison     Oceana Matrix Ltd.
Software Engineer     21 Princeton Place
716-662-8973 x26      Orchard Park, NY 14127
--PGP Public Key--    http://www.oceana.com/~ken/ksm.pgp
---
Cyrus Home Page: http://asg.web.cmu.edu/cyrus
Cyrus Wiki/FAQ: http://cyruswiki.andrew.cmu.edu
List Archives/Info: http://asg.web.cmu.edu/cyrus/mailing-list.html




More information about the Info-cyrus mailing list