* regexp.c: generic and extensible Regular Expression engine
*
* Basically designed with the purpose of compiling regexps for
- * the variety of validation/shemas mechanisms now available in
+ * the variety of validation/schemas mechanisms now available in
* XML related specifications these include:
* - XML-1.0 DTD validation
* - XML Schemas structure part 1
int maxTrans;
int nbTrans;
xmlRegTrans *trans;
- /* knowing states ponting to us can speed things up */
+ /* knowing states pointing to us can speed things up */
int maxTransTo;
int nbTransTo;
int *transTo;
if (transitions == NULL) {
xmlFree(stateRemap);
xmlFree(stringRemap);
+ for (i = 0;i < nbatoms;i++)
+ xmlFree(stringMap[i]);
xmlFree(stringMap);
xmlFree(ret);
return(NULL);
targetno = stateRemap[trans->to];
/*
* if the same atom can generate transitions to 2 different
- * states then it means the automata is not determinist and
+ * states then it means the automata is not deterministic and
* the compact form can't be used !
*/
prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
/**
* xmlRegCopyAtom:
* @ctxt: the regexp parser context
- * @atom: the oiginal atom
+ * @atom: the original atom
*
* Allocate a new regexp range
*
int nullable = 0;
if (atom == NULL) {
- ERROR("genrate transition: atom == NULL");
+ ERROR("generate transition: atom == NULL");
return(-1);
}
if (atom->type == XML_REGEXP_SUBREG) {
xmlRegAtomPtr copy;
/*
* duplicate a transition based on atom to count next
- * occurences after 1. We cannot loop to atom->start
+ * occurrences after 1. We cannot loop to atom->start
* directly because we need an epsilon transition to
* newstate.
*/
} else {
/*
* either we need the atom at least once or there
- * is an atom->start0 allowing to easilly plug the
+ * is an atom->start0 allowing to easily plug the
* epsilon transition.
*/
counter = xmlRegGetCounter(ctxt);
* Build the completed transitions bypassing the epsilons
* Use a marking algorithm to avoid loops
* Mark sink states too.
- * Process from the latests states backward to the start when
+ * Process from the latest states backward to the start when
* there is long cascading epsilon chains this minimize the
* recursions and transition compares when adding the new ones
*/
} else {
/*
* comparing a block range with anything else is way
- * too costly, and maintining the table is like too much
+ * too costly, and maintaining the table is like too much
* memory too, so let's force the automata to save state
* here.
*/
}
switch (type1) {
case XML_REGEXP_ANYSPACE: /* \s */
- /* can't be a letter, number, mark, pontuation, symbol */
+ /* can't be a letter, number, mark, punctuation, symbol */
if ((type2 == XML_REGEXP_NOTSPACE) ||
((type2 >= XML_REGEXP_LETTER) &&
(type2 <= XML_REGEXP_LETTER_OTHERS)) ||
case XML_REGEXP_NOTSPACE: /* \S */
break;
case XML_REGEXP_INITNAME: /* \l */
- /* can't be a number, mark, separator, pontuation, symbol or other */
+ /* can't be a number, mark, separator, punctuation, symbol or other */
if ((type2 == XML_REGEXP_NOTINITNAME) ||
((type2 >= XML_REGEXP_NUMBER) &&
(type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
case XML_REGEXP_NOTINITNAME: /* \L */
break;
case XML_REGEXP_NAMECHAR: /* \c */
- /* can't be a mark, separator, pontuation, symbol or other */
+ /* can't be a mark, separator, punctuation, symbol or other */
if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
((type2 >= XML_REGEXP_MARK) &&
(type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
case XML_REGEXP_NOTNAMECHAR: /* \C */
break;
case XML_REGEXP_DECIMAL: /* \d */
- /* can't be a letter, mark, separator, pontuation, symbol or other */
+ /* can't be a letter, mark, separator, punctuation, symbol or other */
if ((type2 == XML_REGEXP_NOTDECIMAL) ||
(type2 == XML_REGEXP_REALCHAR) ||
((type2 >= XML_REGEXP_LETTER) &&
case XML_REGEXP_NOTDECIMAL: /* \D */
break;
case XML_REGEXP_REALCHAR: /* \w */
- /* can't be a mark, separator, pontuation, symbol or other */
+ /* can't be a mark, separator, punctuation, symbol or other */
if ((type2 == XML_REGEXP_NOTDECIMAL) ||
((type2 >= XML_REGEXP_MARK) &&
(type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
case XML_REGEXP_STRING:
if (!deep)
ret = (atom1->valuep != atom2->valuep);
- else
- ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
- (xmlChar *)atom2->valuep);
+ else {
+ xmlChar *val1 = (xmlChar *)atom1->valuep;
+ xmlChar *val2 = (xmlChar *)atom2->valuep;
+ int compound1 = (xmlStrchr(val1, '|') != NULL);
+ int compound2 = (xmlStrchr(val2, '|') != NULL);
+
+ /* Ignore negative match flag for ##other namespaces */
+ if (compound1 != compound2)
+ return(0);
+
+ ret = xmlRegStrEqualWildcard(val1, val2);
+ }
break;
case XML_REGEXP_EPSILON:
goto not_determinist;
/**
* xmlRegFreeExecCtxt:
- * @exec: a regular expression evaulation context
+ * @exec: a regular expression evaluation context
*
- * Free the structures associated to a regular expression evaulation context.
+ * Free the structures associated to a regular expression evaluation context.
*/
void
xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
* @valStr: the validation string
*
* Checks if both strings are equal or have the same content. "*"
- * can be used as a wildcard in @valStr; "|" is used as a seperator of
+ * can be used as a wildcard in @valStr; "|" is used as a separator of
* substrings in both @expStr and @valStr.
*
* Returns 1 if the comparison is satisfied and the number of substrings
previous = ctxt->state;
ret = xmlFAParsePiece(ctxt);
- if (ret != 0) {
+ if (ret == 0) {
+ /* Empty branch */
+ xmlFAGenerateEpsilonTransition(ctxt, previous, to);
+ } else {
if (xmlFAGenerateTransitions(ctxt, previous,
- (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
+ (CUR=='|' || CUR==')' || CUR==0) ? to : NULL, ctxt->atom) < 0)
return(-1);
previous = ctxt->state;
ctxt->atom = NULL;
ret = xmlFAParsePiece(ctxt);
if (ret != 0) {
if (xmlFAGenerateTransitions(ctxt, previous,
- (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
+ (CUR=='|' || CUR==')' || CUR==0) ? to : NULL,
+ ctxt->atom) < 0)
return(-1);
previous = ctxt->state;
ctxt->atom = NULL;
end = ctxt->state;
while ((CUR == '|') && (ctxt->error == 0)) {
NEXT;
- if (CUR == 0) {
- ERROR("expecting a branch after |")
- return;
- }
ctxt->state = start;
ctxt->end = NULL;
xmlFAParseBranch(ctxt, end);
return(comp->determinist);
am = xmlNewAutomata();
+ if (am == NULL)
+ return(-1);
if (am->states != NULL) {
int i;
* @to: the target point of the transition or NULL
* @token: the input string associated to that transition
* @token2: the second input string associated to that transition
- * @min: the minimum successive occurences of token
- * @max: the maximum successive occurences of token
+ * @min: the minimum successive occurrences of token
+ * @max: the maximum successive occurrences of token
* @data: data associated to the transition
*
* If @to is NULL, this creates first a new target state in the automata
* @from: the starting point of the transition
* @to: the target point of the transition or NULL
* @token: the input string associated to that transition
- * @min: the minimum successive occurences of token
- * @max: the maximum successive occurences of token
+ * @min: the minimum successive occurrences of token
+ * @max: the maximum successive occurrences of token
* @data: data associated to the transition
*
* If @to is NULL, this creates first a new target state in the automata
* @to: the target point of the transition or NULL
* @token: the input string associated to that transition
* @token2: the second input string associated to that transition
- * @min: the minimum successive occurences of token
- * @max: the maximum successive occurences of token
+ * @min: the minimum successive occurrences of token
+ * @max: the maximum successive occurrences of token
* @data: data associated to the transition
*
* If @to is NULL, this creates first a new target state in the automata
* @from: the starting point of the transition
* @to: the target point of the transition or NULL
* @token: the input string associated to that transition
- * @min: the minimum successive occurences of token
- * @max: the maximum successive occurences of token
+ * @min: the minimum successive occurrences of token
+ * @max: the maximum successive occurrences of token
* @data: data associated to the transition
*
* If @to is NULL, this creates first a new target state in the automata
/* OR reduction rule 1 */
/* a | a reduced to a */
if (left == right) {
- left->ref--;
+ xmlExpFree(ctxt, right);
return(left);
}
/* OR canonicalization rule 1 */
* xmlExpIsNillable:
* @exp: the expression
*
- * Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
+ * Finds if the expression is nillable, i.e. if it accepts the empty sequence
*
* Returns 1 if nillable, 0 if not and -1 in case of error
*/
* so that sub{n} subsume exp
*
* Returns the multiple value if successful, 0 if it is not a multiple
- * and -1 in case of internel error.
+ * and -1 in case of internal error.
*/
static int
return(forbiddenExp);
}
#ifdef DEBUG_DERIV
- printf("Compex exp vs Atom -> Forbid\n");
+ printf("Complex exp vs Atom -> Forbid\n");
#endif
return(forbiddenExp);
case XML_EXP_SEQ:
*
* Evaluates the expression resulting from @exp consuming a sub expression @sub
* Based on algebraic derivation and sometimes direct Brzozowski derivation
- * it usually tatkes less than linear time and can handle expressions generating
+ * it usually takes less than linear time and can handle expressions generating
* infinite languages.
*
* Returns the resulting expression or NULL in case of internal error, the
}
if (xmlExpCheckCard(exp, sub) == 0) {
#ifdef DEBUG_DERIV
- printf("sub generate longuer sequances than exp : can't subsume\n");
+ printf("sub generate longer sequences than exp : can't subsume\n");
#endif
return(forbiddenExp);
}
* @exp: the englobing expression
* @sub: the subexpression
*
- * Check whether @exp accepts all the languages accexpted by @sub
+ * Check whether @exp accepts all the languages accepted by @sub
* the input being a subexpression.
*
* Returns 1 if true 0 if false and -1 in case of failure.
}
if (xmlExpCheckCard(exp, sub) == 0) {
#ifdef DEBUG_DERIV
- printf("sub generate longuer sequances than exp : can't subsume\n");
+ printf("sub generate longer sequences than exp : can't subsume\n");
#endif
return(0);
}