Mail Archives: djgpp/1997/01/23/12:09:31
On Thu, 23 Jan 1997, Jordy Potman wrote:
> be compared at all and that version 2.01 seems to be faster. But I'm
> still sure that the v2.01 version is a lot slower, because you notice
> it when running both versions. The v2.01 version responds slower to the
> keyboard commands and updates the screen at a lower frame rate.
> This only happens when I use the -pg switch with compiling and linking,
> without profiling the v2.01 version runs fine.
There might be a reason to this which is totally unrelated to your
program. Recently, a bug has been discovered in a library function which
is only linked in when you compile with -pg. This bug causes a call to
another library function (`_mono_printf') which tries to write to the
(probably absent) extra mono display. On some configurations, this will
just crash the program being profiled, but if you run on a machine where
the relevant addresses aren't remapped by the memory manager (to make
UMB possible), then the program might run. However, I can imagine a
configuration where this bug slows down the program instead of crashing
it. Since this bug was introduced with v2.01, it might be the cause of
what you see.
I attach below a corrected source for the library function in point.
Compile it, put it into your libc.a and see if that helps. Here's what
you should do:
gcc -Wall -c -O3 mcount.c
ar rvs c:/djgpp/lib/libc.a mcount.o
(change the pathname of libc.a as appropriate for your installation).
Then relink your program with -pg and try again.
----------------------------- mcount.c ---------------------------------
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
#include <libc/stubs.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <signal.h>
#include <setjmp.h>
#include <sys/time.h>
#include <sys/exceptn.h>
#include <sys/mono.h>
/* header of a GPROF type file
*/
typedef struct {
long low;
long high;
long nbytes;
} header;
/* entry of a GPROF type file
*/
typedef struct {
unsigned long from;
unsigned long to;
unsigned long count;
} MTABE;
/* internal form - sizeof(MTAB) is 4096 for efficiency
*/
typedef struct MTAB {
MTABE calls[341];
struct MTAB *prev;
} MTAB;
static header h;
static short *histogram;
static int mcount_skip = 1;
static int histlen;
static MTAB *mtab=0;
extern int etext;
/* called by functions. Use the pointer it provides to cache
** the last used MTABE, so that repeated calls to/from the same
** pair works quickly - no lookup.
*/
void mcount(int _to);
void mcount(int _to)
{
MTAB *m;
int i;
int to;
int ebp;
int from;
int mtabi;
MTABE **cache;
if (&_to < &etext)
*(int *)(-1) = 0; /* fault! */
mcount_skip = 1;
asm("movl %%edx,%0" : "=g" (cache)); /* obtain the cached pointer */
to = *((&_to)-1) - 12;
ebp = *((&_to)-2); /* glean the caller's return address from the stack */
from = ((int *)ebp)[1];
if (*cache && ((*cache)->from == from) && ((*cache)->to == to))
{
/* cache paid off - works quickly */
(*cache)->count++;
mcount_skip = 0;
return;
}
/* no cache hit - search all mtab tables for a match, or an empty slot */
mtabi = -1;
for (m=mtab; m; m=m->prev)
{
for (i=0; i<341; i++)
{
if (m->calls[i].from == 0)
{
/* empty slot - end of table */
mtabi = i;
break;
}
if ((m->calls[i].from == from) &&
(m->calls[i].to == to))
{
/* found a match - bump count and return */
m->calls[i].count ++;
*cache = m->calls + i;
mcount_skip = 0;
return;
}
}
}
if (mtabi != -1)
{
/* found an empty - fill it in */
mtab->calls[mtabi].from = from;
mtab->calls[mtabi].to = to;
mtab->calls[mtabi].count = 1;
*cache = mtab->calls + mtabi;
mcount_skip = 0;
return;
}
/* lob off another page of memory and initialize the new table */
m = (MTAB *)sbrk(sizeof(MTAB));
memset(m, 0, sizeof(MTAB));
m->prev = mtab;
mtab = m;
m->calls[0].from = from;
m->calls[0].to = to;
m->calls[0].count = 1;
*cache = m->calls;
mcount_skip = 0;
}
/* this is called during program exit (installed by atexit). */
static void
mcount_write(void)
{
MTAB *m;
int i, f;
struct itimerval new_values;
mcount_skip = 1;
/* disable timer */
new_values.it_value.tv_usec = new_values.it_interval.tv_usec = 0;
new_values.it_value.tv_sec = new_values.it_interval.tv_sec = 0;
setitimer(ITIMER_PROF, &new_values, NULL);
f = open("gmon.out", O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0666);
write(f, &h, sizeof(header));
write(f, histogram, histlen);
for (m=mtab; m; m=m->prev)
{
for (i=0; i<341; i++)
if (m->calls[i].from == 0)
break;
write(f, m->calls, i*12);
}
close(f);
}
extern unsigned start __asm__ ("start");
#define START (unsigned)&start
extern int etext;
/* ARGSUSED */
static void
mcount_tick(int _x)
{
unsigned bin;
if(!mcount_skip) {
bin = __djgpp_exception_state->__eip;
if(bin >= START && bin <= (unsigned)&etext) {
bin = (bin - START) / 4; /* 4 EIP's per bin */
histogram[bin]++;
}
}
}
/* this is called to initialize profiling before the program starts */
void _mcount_init(void);
void
_mcount_init(void)
{
struct itimerval new_values;
h.low = START;
h.high = (int)&etext;
histlen = (h.high-h.low)/4*sizeof(short);
h.nbytes = sizeof(header) + histlen;
histogram = (short *)sbrk(histlen);
memset(histogram, 0, histlen);
atexit(mcount_write);
/* here, do whatever it takes to initialize the timer interrupt */
signal(SIGPROF, mcount_tick);
/* 18.2 tics per second */
new_values.it_value.tv_usec = new_values.it_interval.tv_usec = 5494;
new_values.it_value.tv_sec = new_values.it_interval.tv_sec = 0;
setitimer(ITIMER_PROF, &new_values, NULL);
mcount_skip = 0;
}
- Raw text -