1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-25 11:37:56 +00:00
freebsd/usr.bin/sgmls/sgmls/context.c
John Fieber 44842f599e Upgrade from 1.1 to 1.1.91. Unknown to me, the latter version was
actually available at the time I brought in the former.
Lots of assorted bug fixes and much needed support for catalogs.
1996-06-04 19:09:50 +00:00

452 lines
18 KiB
C

#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
#include "context.h"
#define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */
#define NEWGI (newetd->etdgi+1) /* GI of new tag. */
#define STATUS (*statuspt) /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
#define PEX (-1) /* GI is a plus exception and not a minus. */
#define ANYHIT(h) (grplongs == 1 ? ((h)[0] != 0) : anyhit(h))
#define HITSET(h, n) (h[(unsigned)(n-1)>>LONGPOW] \
|= (1L<<((n-1)&(LONGBITS-1))))
#define HITON(h, n) (h[(unsigned)(n-1)>>LONGPOW] & (1L<<((n-1)&(LONGBITS-1))))
#define HITOFF(h, n) (!(HITON(h, n)))
#define TOKENHIT HITON(H,T)
static
VOID copypos(to, from)
struct mpos *to, *from;
{
int i;
for (i = 0; i <= (int)from[0].t; i++) {
to[i].g = from[i].g;
to[i].t = from[i].t;
memcpy(to[i].h, from[i].h, grplongs*sizeof(unsigned long));
}
}
/* CONTEXT: Determine whether a GI is valid in the present structural context.
Returns RCHIT if valid, RCEND if element has ended, RCREQ if a
different element is required, and RCMISS if it is totally invalid.
On entry, pos points to the model token to be tested against the GI.
TO DO: Save allowed GIs for an error message on an RCMISS.
Support a "query" mode (what is allowed now?) by working
with a copy of pos.
*/
int context(gi, mod, pos, statuspt, mexts)
struct etd *gi; /* ETD of new GI. */
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/
{
UNCH toccsv, gtypesv; /* Save token's TOCC and GTYPE in case grp ends.*/
if (mexts != 0) {
if (mexts == -1 && STATUS == RCEND)
return RCPEX;
copypos(savedpos, pos);
}
Tstart = T; /* Save starting token for AND group testing. */
while (STATUS!=RCMISS && STATUS!=RCEND) {
TRACEGI("CONTEXT", gi, mod, pos);
while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) {
pos[P+1].g = M++; pos[++P].t = 1; HITCLEAR(H);
Tstart = T; /* Save starting token for AND group testing. */
TRACEGI("OPENGRP", gi, mod, pos);
}
STATUS = (UNCH)tokenreq(gi, mod, pos);
TRACEGI("STATUS", gi, mod, pos);
if (gi==TOKEN.tu.thetd) { /* Hit in model. */
STATUS = (UNCH)RCHIT;
gtypesv = GTYPE; toccsv = TOCC;
newtoken(mod, pos, statuspt);
if (mexts <= 0)
return RCHIT;
else if (gtypesv==TTOR || BITON(toccsv, TOPT)) {
/* restore position */
copypos(pos, savedpos);
return RCMEX;
}
else
return RCHITMEX;
}
if (STATUS==RCREQ) {
if (mexts == -1)
break;
STATUS = RCHIT;
nextetd = TOKEN.tu.thetd;
newtoken(mod, pos, statuspt);
return(RCREQ);
}
/* else if (STATUS==RCNREQ) */
if (mexts>0) return(RCMEX);
newtoken(mod, pos, statuspt);
}
if (mexts == -1) {
copypos(pos, savedpos);
return STATUS = RCPEX;
}
return((int)STATUS);
}
/* ECONTEXT: Determine whether the current element can be ended, or whether
non-optional tokens remain at the current level or higher.
Returns 1 if element can be ended, or 0 if tokens remain.
On entry, STATUS==RCEND if there are no tokens left; if not,
pos points to the next model token to be tested.
TO DO: Support a "query" mode (what is required now?) by working
with a copy of pos.
*/
int econtext(mod, pos, statuspt)
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
{
unsigned next; /* Position in AND group of next testable token.*/
Tstart = T;
TRACEEND("ECONT", mod, pos, 0, 0);
if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));}
nextetd = TTYPE == TTETD ? TOKEN.tu.thetd : 0;
while (STATUS!=RCMISS && STATUS!=RCEND) {
STATUS = (UNCH)testend(mod, pos, 0, 0);
TRACEEND("ECONTEND", mod, pos, 0, 0);
nextetd = P<=1 || TTYPE != TTETD ? 0 : TOKEN.tu.thetd;
if (STATUS==RCEND) return(1);
if (P<=1) return(TOKENHIT || BITON(TOCC, TOPT));
if (STATUS==RCMISS) {
if (BITON(TOCC, TOPT)) nextetd = 0;
return(0);
}
if (!tokenopt(mod, pos)) return(0);
STATUS = RCNREQ;
if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
else T = (UNCH)(((next = (UNS)offbit(H, (int)T, GNUM))!=0) ?
next : offbit(H, 0, GNUM));
M = G + grpsz(&GHDR, (int)T-1) + 1;
TRACEEND("ECONTNEW", mod, pos, 0, 0);
}
if (STATUS==RCMISS) {
if (BITON(TOCC, TOPT)) nextetd = 0;
return(0);
}
return(1); /* STATUS==RCEND */
}
/* NEWTOKEN: Find the next token to test. Set STATUS to indicate results:
RCEND if element has ended (no more tokens to test);
RCREQ if required new token was found;
RCNREQ if non-required new token was found;
RCHIT if a hit token was repeated (now non-required);
and RCMISS if a new token can't be found because current token
(which was not hit) was neither unconditionally required nor
optional.
*/
VOID newtoken(mod, pos, statuspt)
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
{
unsigned nextand = 0; /* Position in AND group of next testable token.*/
int currhit = (STATUS==RCHIT); /* 1=current GI hit; 0=not. */
/* If the GI was a hit, turn on the hit bit and set the status to
assume that the token to be tested against the next GI will
be non-required. If the current token is repeatable, exit so
it will stand as the next token to test.
*/
if (STATUS==RCHIT) {
HITSET(H, T);
STATUS = RCNREQ;
if (BITON(TOCC, TREP)) return;
}
/* At this point, we must determine the next token to test:
either against the next GI, if this one was a hit, or
against the same GI if conditions permit a retry.
To find the next token, we must first end the current group,
if possible, and any we can that contain it.
If the outermost group was a hit and is repeatable, or
if the element has ended, we exit now.
If it hasn't ended, or was optional and ended with a miss,
we can retry the GI against the next token.
*/
if ((STATUS = (UNCH)testend(mod, pos, 1, 1))!=RCNREQ) return;
/* At this point, the "current token" is either the original one,
or the token for the highest level unhit group that it ended.
We will retry a missed GI, by testing it against the next
token, if the current token:
1. Is optional;
2. Was hit (i.e., because it is repeatable and was hit by a
previous GI or because it is a hit group that just ended);
3. Is in an AND or OR group and is not the last testable token.
It will be the next sequential one (unhit one, in an AND group);
if there are none left, use the first unhit token in the group.
In either case, set M to correspond to the new T.
*/
retest:
TRACEEND("RETEST", mod, pos, (int)nextand, 1);
if (GTYPE==TTAND) {
nextand = offbit(H, (int)T, GNUM);
if (!nextand)
nextand = offbit(H, 0, GNUM);
}
if ( BITON(TOCC, TOPT)
|| TOKENHIT
|| GTYPE==TTOR /* T!=GNUM or group would have ended. */
|| nextand ) {
if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
else T = nextand;
M = G + grpsz(&GHDR, (int)T-1) + 1;
if (GTYPE==TTAND) {
/* If AND group wrapped, it can end if all non-optionals were
hit. */
if (T==Tstart && !currhit) {
UNCH Psave = P;
int rc = testend(mod, pos, 0, 1);
if (Psave!=P) {if ((STATUS = (UNCH)rc)==RCNREQ) goto retest;}
else STATUS = RCMISS;
}
/* We only test unhit tokens, so we must use an unhit token
as Tstart (which is used to detect when the AND group has
wrapped). */
else if (HITON(H,Tstart)) Tstart = T;
}
}
else STATUS = RCMISS;
TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1);
}
/* TESTEND: End the current group, if possible, and any that it is nested in.
The current token will either be a group header, or some token
that could not end its group. Return 1 if the (possibly new)
current token is repeatable; 0 if it is not.
*/
int testend(mod, pos, andoptsw, newtknsw)
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
int andoptsw; /* 1=test optional AND members; 0=ignore. */
int newtknsw; /* 1=new token test; 0=end element test. */
{
int rc = 0; /* Return code: RCNREQ RCHIT RCMISS RCEND */
while (!rc) {
TRACEEND("TRACEEND", mod, pos, rc, andoptsw);
/* TESTMISS:
If we've hit no tokens yet in the current group, and
the current token is the last unhit one in the group we can test,
we will end the group (it may never really have started!)
because we might be able to try the token that follows it.
In any group, a token is the last testable unhit token if it
is the last sequential one, as the GI was already tested against
the preceding unhit tokens. In addition,
in a SEQ group, it is the last testable unhit token if it isn't
optional, because we can't skip past it to the following ones.
If we end the group, before popping the level, set M to G, as this
level`s group header will be the next level's current token.
*/
if (!ANYHIT(H) && (T==GNUM
|| (GTYPE==TTSEQ && BITOFF(TOCC, TOPT)))) {
M = G; --P;
if (P<=1) {
if (BITON(TOCC, TOPT) || TOKENHIT) rc = RCEND;
else rc = RCMISS;
}
continue;
}
/* TESTHIT:
See if we've hit all the non-optional tokens in the group.
If so, pop to the previous level and set the group's hit bit.
If we were called from NEWTOKEN we are trying to find the token
to test against the next start-tag, so if the group is repeatable,
process it again. (If not, we were called from ECONTEXT and
are testing whether the element can be ended.)
Otherwise, if we are at the first level, the element is over.
*/
if ((GTYPE==TTOR && TOKENHIT)
|| (GTYPE==TTSEQ && T==(UNCH)GNUM
&& (TOKENHIT || BITON(TOCC, TOPT)))
|| (GTYPE==TTAND && allhit(&GHDR, H, 0, andoptsw))) {
M = G;
--P;
HITSET(H, T);
Tstart = T;
if (newtknsw && BITON(TOCC, TREP)) rc = RCHIT;
else if (P<=1) rc = RCEND;
/* If we are looking for a new token to test against the next
start-tag, then we need to consider optional and members
in this group, even if we didn't need to consider them
in the group that we just ended because that group had
wrapped. */
else if (newtknsw) andoptsw = 1;
/* Else loop to test new outer group. */
}
else rc = RCNREQ; /* No group ended this time, so return. */
}
TRACEEND("ENDFOUND", mod, pos, rc, andoptsw);
return(rc);
}
/* TOKENOPT: Return 1 if current token is contextually optional;
otherwise, return 0.
*/
int tokenopt(mod, pos)
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
{
TRACEEND("TOKENOPT", mod, pos, 0, 0);
return (BITON(TOCC, TOPT) /* Inherently optional. */
|| TOKENHIT /* Was hit (handles "plus" suffix case). */
|| (!ANYHIT(H) && groupopt(mod, pos)));
/* In optional group with no hits. */
}
/* GROUPOPT: Temporarily makes the current group be the current token so that
TOKENOPT() can be applied to it. Returns the value returned
by TOKENOPT.
*/
int groupopt(mod, pos)
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
{
UNCH saveM; /* Save M when testing if group is not required.*/
int rc; /* 1=contextually optional; 0=not. */
if (P==1) return(BITON(GOCC, TOPT) || TOKENHIT);
saveM = M; M = G; --P;
rc = tokenopt(mod, pos);
++P; G = M; M = saveM;
return(rc);
}
/* TOKENREQ: Returns RCREQ if the current token is "contextually required".
That is, it is not contextually optional and
1) it is a member of a "seq" group that is either required
or has at least 1 hit token.
2) it is a member of an "and" group in which all other
tokens were hit.
Optional tokens are not counted
if GI is ETDCDATA, as we are looking for an
omitted start-tag. Otherwise, they are counted,
as the GI might match one of them.
Returns RCNREQ if the current token is "not required".
*/
int tokenreq(gi, mod, pos)
struct etd *gi; /* ETD of new GI. */
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
{
TRACEGI("TOKENREQ", gi, mod, pos);
return( tokenopt(mod, pos) ? RCNREQ
: ( GTYPE==TTSEQ && (ANYHIT(H) || groupreq(gi, mod, pos)==RCREQ)
#if 0
|| (GTYPE==TTAND && allhit(&GHDR, H, T, \*gi!=ETDCDATA*\ 1))
#endif
)
? RCREQ : RCNREQ );
}
/* GROUPREQ: Temporarily makes the current group be the current token so that
TOKENREQ() can be applied to it. Returns the value returned
by TOKENREQ.
*/
int groupreq(gi, mod, pos)
struct etd *gi; /* ETD of new GI. */
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
{
UNCH saveM; /* Save M when testing if group is not required.*/
int rc; /* Return code: RCREQ RCNREQ */
if (P==1) return(BITOFF(GOCC, TOPT) ? RCREQ : RCNREQ);
saveM = M; M = G; --P;
rc = tokenreq(gi, mod, pos);
++P; G = M; M = saveM;
return(rc);
}
/* GRPSZ: Returns the number of tokens spanned by a group in the model (M),
from the group's start (G) to a specified index within the group (T).
M = 0, plus 1 for each token in the group, plus the size of
any subgroups (gotten by calling GRPSZ recursively). On entry,
M must be equal to G at the current level.
*/
int grpsz(g, t)
struct thdr *g; /* mod[G]: Ptr to group in the model. */
int t; /* T: Index of last token in the group. */
{
struct thdr *p = g; /* Ptr to current token in the model. */
int m = 0; /* Size of group (including nested groups). */
int i = 0; /* Number of group members (loop counter). */
UNS type; /* Token type (without TOREP bits). */
while (++i<=t) {
++p; ++m;
type = GET(p->ttype, TTMASK);
if (type==TTOR || type==TTSEQ || type==TTAND) {
m += grpsz(p, p->tu.tnum);
p = g+m;
}
}
return(m);
}
/* ALLHIT: Returns 1 if all hit bits for the specified group are turned on,
(other than those that correspond to optional tokens if "opt" is
0) and the "but" bit (all bits if "but" bit is zero). Otherwise,
returns 0. GRPSZ is used to skip past subgroup tokens.
*/
int allhit(p, hits, but, opt)
struct thdr *p; /* mod[G]: Ptr to group in the model. */
unsigned long *hits; /* H: Hit bits to be tested. */
int but; /* Index of bit to ignore; 0=test all. */
int opt; /* 1=optional tokens must be hit; 0=ignore. */
{
int b = 0; /* Index of bit being tested in hits. */
int e = p->tu.tnum; /* Ending index (number of bits to test). */
unsigned type; /* Token type (without TOREP bits). */
while (++p, ++b<=e) {
if (HITOFF(hits,b) && (opt || BITOFF(p->ttype,TOPT)) && b!=but)
return 0;
if ((type = GET(p->ttype,TTMASK))==TTOR || type==TTSEQ || type==TTAND)
p += grpsz(p, p->tu.tnum);
}
return 1;
}
/* OFFBIT: Returns the index of the first unset bit after (i.e., not including)
the caller's "first" bit. If all bits through the
specified last bit are on, it returns 0.
*/
int offbit(bits, first, last)
unsigned long *bits; /* Bits to be tested. */
int first; /* Index of first bit to be tested in bits. */
int last; /* Index of last bit to be tested in bits. */
{
while (++first <= last)
if (HITOFF(bits, first))
return first;
return 0;
}
/* ANYHIT: Return 1 if any bit is set. */
int anyhit(bits)
unsigned long *bits;
{
int i;
for (i = 0; i < grplongs; i++)
if (bits[i] != 0)
return 1;
return 0;
}
/*
Local Variables:
c-indent-level: 5
c-continued-statement-offset: 5
c-brace-offset: -5
c-argdecl-indent: 0
c-label-offset: -5
comment-column: 30
End:
*/