ftp.delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp-workers/2002/10/16/07:13:50

Sender: rich AT phekda DOT freeserve DOT co DOT uk
Message-ID: <3DAD4230.B4F990C4@phekda.freeserve.co.uk>
Date: Wed, 16 Oct 2002 11:40:48 +0100
From: Richard Dawe <rich AT phekda DOT freeserve DOT co DOT uk>
X-Mailer: Mozilla 4.77 [en] (X11; U; Linux 2.2.19 i586)
X-Accept-Language: de,fr
MIME-Version: 1.0
To: djgpp-workers AT delorie DOT com
Subject: Re: libc' getenv optimization (patch2)
References: <10210150631 DOT AA20605 AT clio DOT rice DOT edu> <2 DOT 7 DOT 9 DOT 1C51H DOT H41539 AT pauzner DOT dnttm DOT ru>
Reply-To: djgpp-workers AT delorie DOT com

Hello.

Leonid Pauzner wrote:
[snip]
> This is the revised patch, I choose 16 bucket.
> 
> diff -u -p old/getenv.c ./getenv.c
> --- old/getenv.c        Fri Nov 24 22:21:18 1995
> +++ ./getenv.c  Tue Oct 15 19:23:14 2002
> @@ -1,20 +1,41 @@
>  /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
>  #include <stdlib.h>
>  #include <string.h>
> +#include <libc/environ.h>
> 
>  extern char **environ;
> 
> +/*
> +   getenv optimization: we copy environ pointers into 16 (or 32) lists,
> +   associated with the mask of the first letter of the text string.
> +   This will speedup getenv by the factor of 4, or like.
> +   Reset hash_env[] if environ was changed since the previous getenv call.
> +*/
> +#define BUCKETS  16
> +
> +char **hash_env[BUCKETS] = {}; /* init with NULLs*/
> +void set_hash_env(void);
> +

These should be declared as static, to avoid polluting the namespace when
people use getenv.

>  char *
>  getenv(const char *name)
>  {
> -  int i;
> +  static unsigned last_env_changed = 0;
> +  char **envi;
> +
> +  if (last_env_changed != __environ_changed)
> +  {
> +    last_env_changed = __environ_changed;
> +    set_hash_env();
> +  }
> 
> -  if (environ == 0)
> +  envi = hash_env[(unsigned char)*name % BUCKETS];
> +  if (envi == 0)
>      return 0;
> 
> -  for (i=0; environ[i]; i++)
> +  envi++;                 /* envi[0] is reserved */
> +  for ( ; *envi; envi++)
>    {
> -    char *ep = environ[i];
> +    char *ep = *envi;
>      const char *np = name;
>      while (*ep && *np && *ep == *np && *np != '=')
>        ep++, np++;
> @@ -22,4 +43,61 @@ getenv(const char *name)
>        return ep+1;
>    }
>    return 0;
> +}
> +
> +
> +void
> +set_hash_env(void)
> +{
> +  char **env = environ;
> +  int size[BUCKETS];
> +  int i;
> +  int alloc;
> +
> +  for (i=0; i < BUCKETS; i++)
> +    size[i] = 0;
> +  while (*env)
> +  {
> +    i = (unsigned char)**env % BUCKETS;
> +    size[i]++;
> +    env++;
> +  }
> +
> +  /* (re)allocation:  to avoid too many small realloc's
> +   * we reserve  hash_env[i][0]  for allocated size info, using a cast.
> +   * Do not forget it in this file to avoid off-by-one error.
> +   */

Aargh. I hate it when people use pointers to store ints and vice-versa. What's
wrong with having a separate array to hold the size information? It would make
the code more readable too.

> +  for (i=0; i < BUCKETS; i++)
> +  {
> +    if (size[i])
> +    {
> +      if (!hash_env[i]  ||  (int)hash_env[i][0] < size[i] + 2)
> +      {
> +        alloc = 2 * size[i] + 2;  /* x2 step */
> +        hash_env[i] = (char **)realloc(hash_env[i], (alloc) * sizeof(char *));

You shouldn't cast the return value of malloc, calloc and realloc. That can
hide errors, if you forget to include <stdlib.h>.

> +        hash_env[i][0] = (char *)alloc;
> +        hash_env[i][size[i]+1] = 0;
> +      }
> +      else
> +        hash_env[i][size[i]+1] = 0;
> +    }
> +    else
> +    {
> +      if (hash_env[i])
> +      {
> +        free(hash_env[i]);
> +        hash_env[i] = 0;
> +      }
> +    }
> +  }
> +  /* (re)allocation done */
> +
> +  env = environ;
> +  while (*env)
> +  {
> +    i = (unsigned char)**env % BUCKETS;
> +    hash_env[i][size[i]] = *env;
> +    size[i]--;
> +    env++;
> +  }
>  }

Bye, Rich =]

-- 
Richard Dawe [ http://www.phekda.freeserve.co.uk/richdawe/ ]

- Raw text -


  webmaster     delorie software   privacy  
  Copyright 2019   by DJ Delorie     Updated Jul 2019