mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-25 11:37:56 +00:00
44842f599e
actually available at the time I brought in the former. Lots of assorted bug fixes and much needed support for catalogs.
452 lines
18 KiB
C
452 lines
18 KiB
C
#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
|
|
#include "context.h"
|
|
|
|
#define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */
|
|
#define NEWGI (newetd->etdgi+1) /* GI of new tag. */
|
|
#define STATUS (*statuspt) /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
|
|
#define PEX (-1) /* GI is a plus exception and not a minus. */
|
|
|
|
#define ANYHIT(h) (grplongs == 1 ? ((h)[0] != 0) : anyhit(h))
|
|
#define HITSET(h, n) (h[(unsigned)(n-1)>>LONGPOW] \
|
|
|= (1L<<((n-1)&(LONGBITS-1))))
|
|
#define HITON(h, n) (h[(unsigned)(n-1)>>LONGPOW] & (1L<<((n-1)&(LONGBITS-1))))
|
|
|
|
#define HITOFF(h, n) (!(HITON(h, n)))
|
|
|
|
#define TOKENHIT HITON(H,T)
|
|
|
|
static
|
|
VOID copypos(to, from)
|
|
struct mpos *to, *from;
|
|
{
|
|
int i;
|
|
for (i = 0; i <= (int)from[0].t; i++) {
|
|
to[i].g = from[i].g;
|
|
to[i].t = from[i].t;
|
|
memcpy(to[i].h, from[i].h, grplongs*sizeof(unsigned long));
|
|
}
|
|
}
|
|
|
|
/* CONTEXT: Determine whether a GI is valid in the present structural context.
|
|
Returns RCHIT if valid, RCEND if element has ended, RCREQ if a
|
|
different element is required, and RCMISS if it is totally invalid.
|
|
On entry, pos points to the model token to be tested against the GI.
|
|
TO DO: Save allowed GIs for an error message on an RCMISS.
|
|
Support a "query" mode (what is allowed now?) by working
|
|
with a copy of pos.
|
|
*/
|
|
int context(gi, mod, pos, statuspt, mexts)
|
|
struct etd *gi; /* ETD of new GI. */
|
|
struct thdr mod[]; /* Model of current open element. */
|
|
struct mpos pos[]; /* Position in open element's model. */
|
|
UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
|
|
int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/
|
|
{
|
|
UNCH toccsv, gtypesv; /* Save token's TOCC and GTYPE in case grp ends.*/
|
|
|
|
if (mexts != 0) {
|
|
if (mexts == -1 && STATUS == RCEND)
|
|
return RCPEX;
|
|
copypos(savedpos, pos);
|
|
}
|
|
Tstart = T; /* Save starting token for AND group testing. */
|
|
while (STATUS!=RCMISS && STATUS!=RCEND) {
|
|
TRACEGI("CONTEXT", gi, mod, pos);
|
|
while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) {
|
|
pos[P+1].g = M++; pos[++P].t = 1; HITCLEAR(H);
|
|
Tstart = T; /* Save starting token for AND group testing. */
|
|
TRACEGI("OPENGRP", gi, mod, pos);
|
|
}
|
|
STATUS = (UNCH)tokenreq(gi, mod, pos);
|
|
TRACEGI("STATUS", gi, mod, pos);
|
|
if (gi==TOKEN.tu.thetd) { /* Hit in model. */
|
|
STATUS = (UNCH)RCHIT;
|
|
gtypesv = GTYPE; toccsv = TOCC;
|
|
newtoken(mod, pos, statuspt);
|
|
if (mexts <= 0)
|
|
return RCHIT;
|
|
else if (gtypesv==TTOR || BITON(toccsv, TOPT)) {
|
|
/* restore position */
|
|
copypos(pos, savedpos);
|
|
return RCMEX;
|
|
}
|
|
else
|
|
return RCHITMEX;
|
|
}
|
|
if (STATUS==RCREQ) {
|
|
if (mexts == -1)
|
|
break;
|
|
STATUS = RCHIT;
|
|
nextetd = TOKEN.tu.thetd;
|
|
newtoken(mod, pos, statuspt);
|
|
return(RCREQ);
|
|
}
|
|
/* else if (STATUS==RCNREQ) */
|
|
if (mexts>0) return(RCMEX);
|
|
newtoken(mod, pos, statuspt);
|
|
}
|
|
if (mexts == -1) {
|
|
copypos(pos, savedpos);
|
|
return STATUS = RCPEX;
|
|
}
|
|
return((int)STATUS);
|
|
}
|
|
/* ECONTEXT: Determine whether the current element can be ended, or whether
|
|
non-optional tokens remain at the current level or higher.
|
|
Returns 1 if element can be ended, or 0 if tokens remain.
|
|
On entry, STATUS==RCEND if there are no tokens left; if not,
|
|
pos points to the next model token to be tested.
|
|
TO DO: Support a "query" mode (what is required now?) by working
|
|
with a copy of pos.
|
|
*/
|
|
int econtext(mod, pos, statuspt)
|
|
struct thdr mod[]; /* Model of current open element. */
|
|
struct mpos pos[]; /* Position in open element's model. */
|
|
UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
|
|
{
|
|
unsigned next; /* Position in AND group of next testable token.*/
|
|
|
|
Tstart = T;
|
|
TRACEEND("ECONT", mod, pos, 0, 0);
|
|
if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));}
|
|
nextetd = TTYPE == TTETD ? TOKEN.tu.thetd : 0;
|
|
while (STATUS!=RCMISS && STATUS!=RCEND) {
|
|
STATUS = (UNCH)testend(mod, pos, 0, 0);
|
|
TRACEEND("ECONTEND", mod, pos, 0, 0);
|
|
nextetd = P<=1 || TTYPE != TTETD ? 0 : TOKEN.tu.thetd;
|
|
if (STATUS==RCEND) return(1);
|
|
if (P<=1) return(TOKENHIT || BITON(TOCC, TOPT));
|
|
if (STATUS==RCMISS) {
|
|
if (BITON(TOCC, TOPT)) nextetd = 0;
|
|
return(0);
|
|
}
|
|
if (!tokenopt(mod, pos)) return(0);
|
|
|
|
STATUS = RCNREQ;
|
|
if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
|
|
else T = (UNCH)(((next = (UNS)offbit(H, (int)T, GNUM))!=0) ?
|
|
next : offbit(H, 0, GNUM));
|
|
|
|
M = G + grpsz(&GHDR, (int)T-1) + 1;
|
|
TRACEEND("ECONTNEW", mod, pos, 0, 0);
|
|
}
|
|
if (STATUS==RCMISS) {
|
|
if (BITON(TOCC, TOPT)) nextetd = 0;
|
|
return(0);
|
|
}
|
|
return(1); /* STATUS==RCEND */
|
|
}
|
|
/* NEWTOKEN: Find the next token to test. Set STATUS to indicate results:
|
|
RCEND if element has ended (no more tokens to test);
|
|
RCREQ if required new token was found;
|
|
RCNREQ if non-required new token was found;
|
|
RCHIT if a hit token was repeated (now non-required);
|
|
and RCMISS if a new token can't be found because current token
|
|
(which was not hit) was neither unconditionally required nor
|
|
optional.
|
|
*/
|
|
VOID newtoken(mod, pos, statuspt)
|
|
struct thdr mod[]; /* Model of current open element. */
|
|
struct mpos pos[]; /* Position in open element's model. */
|
|
UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
|
|
{
|
|
unsigned nextand = 0; /* Position in AND group of next testable token.*/
|
|
int currhit = (STATUS==RCHIT); /* 1=current GI hit; 0=not. */
|
|
|
|
/* If the GI was a hit, turn on the hit bit and set the status to
|
|
assume that the token to be tested against the next GI will
|
|
be non-required. If the current token is repeatable, exit so
|
|
it will stand as the next token to test.
|
|
*/
|
|
if (STATUS==RCHIT) {
|
|
HITSET(H, T);
|
|
STATUS = RCNREQ;
|
|
if (BITON(TOCC, TREP)) return;
|
|
}
|
|
/* At this point, we must determine the next token to test:
|
|
either against the next GI, if this one was a hit, or
|
|
against the same GI if conditions permit a retry.
|
|
To find the next token, we must first end the current group,
|
|
if possible, and any we can that contain it.
|
|
If the outermost group was a hit and is repeatable, or
|
|
if the element has ended, we exit now.
|
|
If it hasn't ended, or was optional and ended with a miss,
|
|
we can retry the GI against the next token.
|
|
*/
|
|
if ((STATUS = (UNCH)testend(mod, pos, 1, 1))!=RCNREQ) return;
|
|
|
|
/* At this point, the "current token" is either the original one,
|
|
or the token for the highest level unhit group that it ended.
|
|
We will retry a missed GI, by testing it against the next
|
|
token, if the current token:
|
|
1. Is optional;
|
|
2. Was hit (i.e., because it is repeatable and was hit by a
|
|
previous GI or because it is a hit group that just ended);
|
|
3. Is in an AND or OR group and is not the last testable token.
|
|
|
|
It will be the next sequential one (unhit one, in an AND group);
|
|
if there are none left, use the first unhit token in the group.
|
|
In either case, set M to correspond to the new T.
|
|
*/
|
|
retest:
|
|
TRACEEND("RETEST", mod, pos, (int)nextand, 1);
|
|
if (GTYPE==TTAND) {
|
|
nextand = offbit(H, (int)T, GNUM);
|
|
if (!nextand)
|
|
nextand = offbit(H, 0, GNUM);
|
|
}
|
|
if ( BITON(TOCC, TOPT)
|
|
|| TOKENHIT
|
|
|| GTYPE==TTOR /* T!=GNUM or group would have ended. */
|
|
|| nextand ) {
|
|
if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
|
|
else T = nextand;
|
|
M = G + grpsz(&GHDR, (int)T-1) + 1;
|
|
if (GTYPE==TTAND) {
|
|
/* If AND group wrapped, it can end if all non-optionals were
|
|
hit. */
|
|
if (T==Tstart && !currhit) {
|
|
UNCH Psave = P;
|
|
int rc = testend(mod, pos, 0, 1);
|
|
if (Psave!=P) {if ((STATUS = (UNCH)rc)==RCNREQ) goto retest;}
|
|
else STATUS = RCMISS;
|
|
}
|
|
|
|
/* We only test unhit tokens, so we must use an unhit token
|
|
as Tstart (which is used to detect when the AND group has
|
|
wrapped). */
|
|
else if (HITON(H,Tstart)) Tstart = T;
|
|
}
|
|
}
|
|
else STATUS = RCMISS;
|
|
TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1);
|
|
}
|
|
/* TESTEND: End the current group, if possible, and any that it is nested in.
|
|
The current token will either be a group header, or some token
|
|
that could not end its group. Return 1 if the (possibly new)
|
|
current token is repeatable; 0 if it is not.
|
|
*/
|
|
int testend(mod, pos, andoptsw, newtknsw)
|
|
struct thdr mod[]; /* Model of current open element. */
|
|
struct mpos pos[]; /* Position in open element's model. */
|
|
int andoptsw; /* 1=test optional AND members; 0=ignore. */
|
|
int newtknsw; /* 1=new token test; 0=end element test. */
|
|
{
|
|
int rc = 0; /* Return code: RCNREQ RCHIT RCMISS RCEND */
|
|
|
|
while (!rc) {
|
|
TRACEEND("TRACEEND", mod, pos, rc, andoptsw);
|
|
/* TESTMISS:
|
|
If we've hit no tokens yet in the current group, and
|
|
the current token is the last unhit one in the group we can test,
|
|
we will end the group (it may never really have started!)
|
|
because we might be able to try the token that follows it.
|
|
In any group, a token is the last testable unhit token if it
|
|
is the last sequential one, as the GI was already tested against
|
|
the preceding unhit tokens. In addition,
|
|
in a SEQ group, it is the last testable unhit token if it isn't
|
|
optional, because we can't skip past it to the following ones.
|
|
If we end the group, before popping the level, set M to G, as this
|
|
level`s group header will be the next level's current token.
|
|
*/
|
|
if (!ANYHIT(H) && (T==GNUM
|
|
|| (GTYPE==TTSEQ && BITOFF(TOCC, TOPT)))) {
|
|
M = G; --P;
|
|
if (P<=1) {
|
|
if (BITON(TOCC, TOPT) || TOKENHIT) rc = RCEND;
|
|
else rc = RCMISS;
|
|
}
|
|
continue;
|
|
}
|
|
/* TESTHIT:
|
|
See if we've hit all the non-optional tokens in the group.
|
|
If so, pop to the previous level and set the group's hit bit.
|
|
If we were called from NEWTOKEN we are trying to find the token
|
|
to test against the next start-tag, so if the group is repeatable,
|
|
process it again. (If not, we were called from ECONTEXT and
|
|
are testing whether the element can be ended.)
|
|
Otherwise, if we are at the first level, the element is over.
|
|
*/
|
|
if ((GTYPE==TTOR && TOKENHIT)
|
|
|| (GTYPE==TTSEQ && T==(UNCH)GNUM
|
|
&& (TOKENHIT || BITON(TOCC, TOPT)))
|
|
|| (GTYPE==TTAND && allhit(&GHDR, H, 0, andoptsw))) {
|
|
M = G;
|
|
--P;
|
|
HITSET(H, T);
|
|
Tstart = T;
|
|
if (newtknsw && BITON(TOCC, TREP)) rc = RCHIT;
|
|
else if (P<=1) rc = RCEND;
|
|
/* If we are looking for a new token to test against the next
|
|
start-tag, then we need to consider optional and members
|
|
in this group, even if we didn't need to consider them
|
|
in the group that we just ended because that group had
|
|
wrapped. */
|
|
else if (newtknsw) andoptsw = 1;
|
|
/* Else loop to test new outer group. */
|
|
}
|
|
else rc = RCNREQ; /* No group ended this time, so return. */
|
|
}
|
|
TRACEEND("ENDFOUND", mod, pos, rc, andoptsw);
|
|
return(rc);
|
|
}
|
|
/* TOKENOPT: Return 1 if current token is contextually optional;
|
|
otherwise, return 0.
|
|
*/
|
|
int tokenopt(mod, pos)
|
|
struct thdr mod[]; /* Model of current open element. */
|
|
struct mpos pos[]; /* Position in open element's model. */
|
|
{
|
|
TRACEEND("TOKENOPT", mod, pos, 0, 0);
|
|
return (BITON(TOCC, TOPT) /* Inherently optional. */
|
|
|| TOKENHIT /* Was hit (handles "plus" suffix case). */
|
|
|| (!ANYHIT(H) && groupopt(mod, pos)));
|
|
/* In optional group with no hits. */
|
|
}
|
|
/* GROUPOPT: Temporarily makes the current group be the current token so that
|
|
TOKENOPT() can be applied to it. Returns the value returned
|
|
by TOKENOPT.
|
|
*/
|
|
int groupopt(mod, pos)
|
|
struct thdr mod[]; /* Model of current open element. */
|
|
struct mpos pos[]; /* Position in open element's model. */
|
|
{
|
|
UNCH saveM; /* Save M when testing if group is not required.*/
|
|
int rc; /* 1=contextually optional; 0=not. */
|
|
|
|
if (P==1) return(BITON(GOCC, TOPT) || TOKENHIT);
|
|
saveM = M; M = G; --P;
|
|
rc = tokenopt(mod, pos);
|
|
++P; G = M; M = saveM;
|
|
return(rc);
|
|
}
|
|
/* TOKENREQ: Returns RCREQ if the current token is "contextually required".
|
|
That is, it is not contextually optional and
|
|
1) it is a member of a "seq" group that is either required
|
|
or has at least 1 hit token.
|
|
2) it is a member of an "and" group in which all other
|
|
tokens were hit.
|
|
Optional tokens are not counted
|
|
if GI is ETDCDATA, as we are looking for an
|
|
omitted start-tag. Otherwise, they are counted,
|
|
as the GI might match one of them.
|
|
Returns RCNREQ if the current token is "not required".
|
|
*/
|
|
int tokenreq(gi, mod, pos)
|
|
struct etd *gi; /* ETD of new GI. */
|
|
struct thdr mod[]; /* Model of current open element. */
|
|
struct mpos pos[]; /* Position in open element's model. */
|
|
{
|
|
TRACEGI("TOKENREQ", gi, mod, pos);
|
|
return( tokenopt(mod, pos) ? RCNREQ
|
|
: ( GTYPE==TTSEQ && (ANYHIT(H) || groupreq(gi, mod, pos)==RCREQ)
|
|
#if 0
|
|
|| (GTYPE==TTAND && allhit(&GHDR, H, T, \*gi!=ETDCDATA*\ 1))
|
|
#endif
|
|
)
|
|
? RCREQ : RCNREQ );
|
|
}
|
|
/* GROUPREQ: Temporarily makes the current group be the current token so that
|
|
TOKENREQ() can be applied to it. Returns the value returned
|
|
by TOKENREQ.
|
|
*/
|
|
int groupreq(gi, mod, pos)
|
|
struct etd *gi; /* ETD of new GI. */
|
|
struct thdr mod[]; /* Model of current open element. */
|
|
struct mpos pos[]; /* Position in open element's model. */
|
|
{
|
|
UNCH saveM; /* Save M when testing if group is not required.*/
|
|
int rc; /* Return code: RCREQ RCNREQ */
|
|
|
|
if (P==1) return(BITOFF(GOCC, TOPT) ? RCREQ : RCNREQ);
|
|
saveM = M; M = G; --P;
|
|
rc = tokenreq(gi, mod, pos);
|
|
++P; G = M; M = saveM;
|
|
return(rc);
|
|
}
|
|
/* GRPSZ: Returns the number of tokens spanned by a group in the model (M),
|
|
from the group's start (G) to a specified index within the group (T).
|
|
M = 0, plus 1 for each token in the group, plus the size of
|
|
any subgroups (gotten by calling GRPSZ recursively). On entry,
|
|
M must be equal to G at the current level.
|
|
*/
|
|
int grpsz(g, t)
|
|
struct thdr *g; /* mod[G]: Ptr to group in the model. */
|
|
int t; /* T: Index of last token in the group. */
|
|
{
|
|
struct thdr *p = g; /* Ptr to current token in the model. */
|
|
int m = 0; /* Size of group (including nested groups). */
|
|
int i = 0; /* Number of group members (loop counter). */
|
|
UNS type; /* Token type (without TOREP bits). */
|
|
|
|
while (++i<=t) {
|
|
++p; ++m;
|
|
type = GET(p->ttype, TTMASK);
|
|
if (type==TTOR || type==TTSEQ || type==TTAND) {
|
|
m += grpsz(p, p->tu.tnum);
|
|
p = g+m;
|
|
}
|
|
}
|
|
return(m);
|
|
}
|
|
/* ALLHIT: Returns 1 if all hit bits for the specified group are turned on,
|
|
(other than those that correspond to optional tokens if "opt" is
|
|
0) and the "but" bit (all bits if "but" bit is zero). Otherwise,
|
|
returns 0. GRPSZ is used to skip past subgroup tokens.
|
|
*/
|
|
int allhit(p, hits, but, opt)
|
|
struct thdr *p; /* mod[G]: Ptr to group in the model. */
|
|
unsigned long *hits; /* H: Hit bits to be tested. */
|
|
int but; /* Index of bit to ignore; 0=test all. */
|
|
int opt; /* 1=optional tokens must be hit; 0=ignore. */
|
|
{
|
|
int b = 0; /* Index of bit being tested in hits. */
|
|
int e = p->tu.tnum; /* Ending index (number of bits to test). */
|
|
unsigned type; /* Token type (without TOREP bits). */
|
|
|
|
while (++p, ++b<=e) {
|
|
if (HITOFF(hits,b) && (opt || BITOFF(p->ttype,TOPT)) && b!=but)
|
|
return 0;
|
|
if ((type = GET(p->ttype,TTMASK))==TTOR || type==TTSEQ || type==TTAND)
|
|
p += grpsz(p, p->tu.tnum);
|
|
}
|
|
return 1;
|
|
}
|
|
/* OFFBIT: Returns the index of the first unset bit after (i.e., not including)
|
|
the caller's "first" bit. If all bits through the
|
|
specified last bit are on, it returns 0.
|
|
*/
|
|
int offbit(bits, first, last)
|
|
unsigned long *bits; /* Bits to be tested. */
|
|
int first; /* Index of first bit to be tested in bits. */
|
|
int last; /* Index of last bit to be tested in bits. */
|
|
{
|
|
while (++first <= last)
|
|
if (HITOFF(bits, first))
|
|
return first;
|
|
return 0;
|
|
}
|
|
|
|
/* ANYHIT: Return 1 if any bit is set. */
|
|
|
|
int anyhit(bits)
|
|
unsigned long *bits;
|
|
{
|
|
int i;
|
|
for (i = 0; i < grplongs; i++)
|
|
if (bits[i] != 0)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
Local Variables:
|
|
c-indent-level: 5
|
|
c-continued-statement-offset: 5
|
|
c-brace-offset: -5
|
|
c-argdecl-indent: 0
|
|
c-label-offset: -5
|
|
comment-column: 30
|
|
End:
|
|
*/
|