% This file is part of the Stanford GraphBase (c) Stanford University 1992
\def\title{WORD\_\thinspace COMPONENTS}
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
\prerequisite{GB\_WORDS}
@* Components. \kern-.7pt
This simple demonstration program computes the connected
components of the GraphBase graph of five-letter words. It prints the
words in order of decreasing weight, showing the number of edges,
components, and isolated vertices present in the graph defined by the
first $n$ words for all~$n$.
@f Vertex int
@f Arc int
@f Graph int
@p
#include "gb_graph.h" /* the GraphBase data structures */
#include "gb_words.h" /* the |words| routine */
@#
main()
{@+Graph *g=words(0,0,0,0); /* the graph we love */
Vertex *v; /* the current vertex being added to the component structure */
Arc *a; /* the current arc of interest */
int n=0; /* the number of vertices in the component structure */
int isol=0; /* the number of isolated vertices in the component structure */
int comp=0; /* the current number of components */
int m=0; /* the current number of edges */
printf("Component analysis of %s\n",g->id);
for (v=g->vertices; vvertices+g->n; v++) {
n++, printf("%4d: %5d %s",n,v->weight,v->name);
@;
printf("; c=%d,i=%d,m=%d\n", comp, isol, m);
}
@;
}
@ The arcs from |v| to previous vertices all appear on the list |v->arcs|
after the arcs from |v| to future vertices. In this program, we aren't
interested in the future, only the past; so we skip the initial arcs.
@=
@;
a=v->arcs;
while (a && a->tip>v) a=a->next;
if (!a) printf("[1]"); /* indicate that this word is isolated */
else {@+int c=0; /* the number of merge steps performed because of |v| */
for (; a; a=a->next) {@+register Vertex *u=a->tip;
m++;
@;
}
printf(" in %s[%d]", v->master->name, v->master->size);
/* show final component */
}
@ We keep track of connected components by using circular lists, a
procedure that is known to take average time $O(n)$ on truly
random graphs [Knuth and Sch\"onhage, {\sl Theoretical Computer Science\/
\bf 6} (1978), 281--315].
Namely, if |v| is a vertex, all the vertices in its component will be
in the list
$$\hbox{|v|, \ |v->link|, \ |v->link->link|, \ \dots,}$$
eventually returning to |v| again. There is also a master vertex in
each component, |v->master|; if |v| is the master vertex, |v->size| will
be the number of vertices in its component.
@d link z.v /* link to next vertex in component (occupies utility field |z|) */
@d master y.v /* pointer to master vertex in component */
@d size x.i /* size of component, kept up to date for master vertices only */
@=
v->link=v;
v->master=v;
v->size=1;
isol++;
comp++;
@ When two components merge together, we change the identity of the master
vertex in the smaller component. The master vertex representing |v| itself
will change if |v| is adjacent to any prior vertex.
@=
u=u->master;
if (u!=v->master) {@+register Vertex *w=v->master, *t;
if (u->sizesize) {
if (c++>0) printf("%s %s[%d]", (c==2? " with": ","), u->name, u->size);
w->size += u->size;
if (u->size==1) isol--;
for (t=u->link; t!=u; t=t->link) t->master=w;
u->master=w;
} else {
if (c++>0) printf("%s %s[%d]", (c==2? " with": ","), w->name, w->size);
if (u->size==1) isol--;
u->size += w->size;
if (w->size==1) isol--;
for (t=w->link; t!=w; t=t->link) t->master=u;
w->master=u;
}
t=u->link;
u->link=w->link;
w->link=t;
comp--;
}
@ The |words| graph has one giant component and lots of isolated vertices.
We consider all other components unusual, so we print them out when the
other computation is done.
@=
printf("\nThe following non-isolated words didn't join the giant component:\n");
for (v=g->vertices; vvertices+g->n; v++)
if (v->master==v && v->size>1 && v->size <4000) {@+register Vertex *u;
int c=1; /* count of number printed on current line */
printf("%s", v->name);
for (u=v->link; u!=v; u=u->link) {
if (c++==12) putchar('\n'),c=1;
printf(" %s",u->name);
}
putchar('\n');
}
@* Index. We close with a list that shows where the identifiers of this
program are defined and used.