Christian Wiese
17 years ago
3 changed files with 211 additions and 0 deletions
@ -0,0 +1,43 @@
|
||||
Sun Jan 28 21:08:45 EET 2007 Ville Laurikari <vl@iki.fi>
|
||||
* Fixed regoff_t documentation for wide characters.
|
||||
The documentation erroneously claimed that offsets are always given in
|
||||
bytes (they are bytes in byte and multibyte strings, but wchar_t
|
||||
offsets in wchar_t strings).
|
||||
|
||||
Thanks to Gregory Sharp for pointing this out.
|
||||
diff -rN -u old-stable/doc/tre-api.html new-stable/doc/tre-api.html
|
||||
--- old-stable/doc/tre-api.html 2007-11-04 20:25:00.000000000 +0200
|
||||
+++ new-stable/doc/tre-api.html 2007-11-04 20:25:00.000000000 +0200
|
||||
@@ -323,22 +323,21 @@
|
||||
<dl>
|
||||
<dt><tt><font class="type">regoff_t</font> <font
|
||||
class="arg">rm_so</font></tt></dt>
|
||||
-<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
|
||||
-to start of substring. </dd>
|
||||
+<dd>Offset from start of <tt><font class="arg">string</font></tt> to start of
|
||||
+substring. </dd>
|
||||
<dt><tt><font class="type">regoff_t</font> <font
|
||||
class="arg">rm_eo</font></tt></dt>
|
||||
-<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
|
||||
-to the first character after the substring. </dd>
|
||||
+<dd>Offset from start of <tt><font class="arg">string</font></tt> to the first
|
||||
+character after the substring. </dd>
|
||||
</dl>
|
||||
</blockquote>
|
||||
|
||||
<p>
|
||||
-The length of a submatch in bytes can be computed by subtracting
|
||||
-<code>rm_eo</code> and <code>rm_so</code>.
|
||||
-If a parenthesized subexpression did not participate in a match, the
|
||||
-<code>rm_so</code> and <code>rm_eo</code> fields for the corresponding
|
||||
-<code>pmatch</code> element are set to <code>-1</code>.
|
||||
-When a multibyte character set is in effect, the submatch offsets are
|
||||
+The length of a submatch can be computed by subtracting <code>rm_eo</code> and
|
||||
+<code>rm_so</code>. If a parenthesized subexpression did not participate in a
|
||||
+match, the <code>rm_so</code> and <code>rm_eo</code> fields for the
|
||||
+corresponding <code>pmatch</code> element are set to <code>-1</code>. Note
|
||||
+that when a multibyte character set is in effect, the submatch offsets are
|
||||
given as byte offsets, not character offsets.
|
||||
</p>
|
||||
|
||||
|
@ -0,0 +1,31 @@
|
||||
Sun Nov 4 18:47:56 EET 2007 Ville Laurikari <vl@iki.fi>
|
||||
* Fixed a bug in \<.
|
||||
\< always matched at the beginning of the string. Thanks to Shmuel
|
||||
Zeigerman for the bug report.
|
||||
|
||||
See http://laurikari.net/pipermail/tre-general/2007-February/000128.html
|
||||
diff -rN -u old-stable/lib/tre-match-utils.h new-stable/lib/tre-match-utils.h
|
||||
--- old-stable/lib/tre-match-utils.h 2007-11-04 20:30:23.000000000 +0200
|
||||
+++ new-stable/lib/tre-match-utils.h 2007-11-04 20:30:23.000000000 +0200
|
||||
@@ -161,7 +161,7 @@
|
||||
&& (next_c != L'\0' || reg_noteol) \
|
||||
&& (next_c != L'\n' || !reg_newline)) \
|
||||
|| ((assertions & ASSERT_AT_BOW) \
|
||||
- && (pos > 0 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))) \
|
||||
+ && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
|
||||
|| ((assertions & ASSERT_AT_EOW) \
|
||||
&& (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
|
||||
|| ((assertions & ASSERT_AT_WB) \
|
||||
diff -rN -u old-stable/tests/retest.c new-stable/tests/retest.c
|
||||
--- old-stable/tests/retest.c 2007-11-04 20:30:23.000000000 +0200
|
||||
+++ new-stable/tests/retest.c 2007-11-04 20:30:23.000000000 +0200
|
||||
@@ -1121,6 +1121,8 @@
|
||||
test_exec("aax xxa", 0, REG_OK, 2, 3, END);
|
||||
test_comp("\\Bx\\b", REG_EXTENDED, 0);
|
||||
test_exec("aax xxx", 0, REG_OK, 2, 3, END);
|
||||
+ test_comp("\\<.", REG_EXTENDED, 0);
|
||||
+ test_exec(";xaa", 0, REG_OK, 1, 2, END);
|
||||
|
||||
/* Shorthands for character classes. */
|
||||
test_comp("\\w+", REG_EXTENDED, 0);
|
||||
|
@ -0,0 +1,137 @@
|
||||
Fri Mar 16 19:18:02 EET 2007 Ville Laurikari <vl@iki.fi>
|
||||
* Refactoring.
|
||||
diff -rN -u old-stable/lib/tre-compile.c new-stable/lib/tre-compile.c
|
||||
--- old-stable/lib/tre-compile.c 2007-11-04 20:27:45.000000000 +0200
|
||||
+++ new-stable/lib/tre-compile.c 2007-11-04 20:27:45.000000000 +0200
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
tre-compile.c - TRE regex compiler
|
||||
|
||||
- Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>
|
||||
+ Copyright (c) 2001-2007 Ville Laurikari <vl@iki.fi>
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
@@ -127,6 +127,30 @@
|
||||
int next_tag;
|
||||
} tre_tag_states_t;
|
||||
|
||||
+
|
||||
+/* Go through `regset' and set submatch data for submatches that are
|
||||
+ using this tag. */
|
||||
+static void
|
||||
+tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; regset[i] >= 0; i++)
|
||||
+ {
|
||||
+ int id = regset[i] / 2;
|
||||
+ int start = !(regset[i] % 2);
|
||||
+ DPRINT((" Using tag %d for %s offset of "
|
||||
+ "submatch %d\n", tag,
|
||||
+ start ? "start" : "end", id));
|
||||
+ if (start)
|
||||
+ tnfa->submatch_data[id].so_tag = tag;
|
||||
+ else
|
||||
+ tnfa->submatch_data[id].eo_tag = tag;
|
||||
+ }
|
||||
+ regset[0] = -1;
|
||||
+}
|
||||
+
|
||||
+
|
||||
/* Adds tags to appropriate locations in the parse tree in `tree', so that
|
||||
subexpressions marked for submatch addressing can be traced. */
|
||||
static reg_errcode_t
|
||||
@@ -281,20 +305,7 @@
|
||||
minimal_tag = -1;
|
||||
num_minimals++;
|
||||
}
|
||||
- /* Go through the regset and set submatch data for
|
||||
- submatches that are using this tag. */
|
||||
- for (i = 0; regset[i] >= 0; i++)
|
||||
- {
|
||||
- int id = regset[i] / 2;
|
||||
- int start = !(regset[i] % 2);
|
||||
- DPRINT((" Using tag %d for %s offset of "
|
||||
- "submatch %d\n", tag,
|
||||
- start ? "start" : "end", id));
|
||||
- if (start)
|
||||
- tnfa->submatch_data[id].so_tag = tag;
|
||||
- else
|
||||
- tnfa->submatch_data[id].eo_tag = tag;
|
||||
- }
|
||||
+ tre_purge_regset(regset, tnfa, tag);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -394,20 +405,7 @@
|
||||
minimal_tag = -1;
|
||||
num_minimals++;
|
||||
}
|
||||
- /* Go through the regset and set submatch data for
|
||||
- submatches that are using this tag. */
|
||||
- for (i = 0; regset[i] >= 0; i++)
|
||||
- {
|
||||
- int id = regset[i] / 2;
|
||||
- int start = !(regset[i] % 2);
|
||||
- DPRINT((" Using tag %d for %s offset of "
|
||||
- "submatch %d\n", tag,
|
||||
- start ? "start" : "end", id));
|
||||
- if (start)
|
||||
- tnfa->submatch_data[id].so_tag = tag;
|
||||
- else
|
||||
- tnfa->submatch_data[id].eo_tag = tag;
|
||||
- }
|
||||
+ tre_purge_regset(regset, tnfa, tag);
|
||||
}
|
||||
|
||||
DPRINT((" num_tags++\n"));
|
||||
@@ -479,20 +477,7 @@
|
||||
minimal_tag = -1;
|
||||
num_minimals++;
|
||||
}
|
||||
- /* Go through the regset and set submatch data for
|
||||
- submatches that are using this tag. */
|
||||
- for (i = 0; regset[i] >= 0; i++)
|
||||
- {
|
||||
- int id = regset[i] / 2;
|
||||
- int start = !(regset[i] % 2);
|
||||
- DPRINT((" Using tag %d for %s offset of "
|
||||
- "submatch %d\n", tag,
|
||||
- start ? "start" : "end", id));
|
||||
- if (start)
|
||||
- tnfa->submatch_data[id].so_tag = tag;
|
||||
- else
|
||||
- tnfa->submatch_data[id].eo_tag = tag;
|
||||
- }
|
||||
+ tre_purge_regset(regset, tnfa, tag);
|
||||
}
|
||||
|
||||
DPRINT((" num_tags++\n"));
|
||||
@@ -640,23 +625,7 @@
|
||||
} /* end while(tre_stack_num_objects(stack) > bottom) */
|
||||
|
||||
if (!first_pass)
|
||||
- {
|
||||
- int i;
|
||||
- /* Go through the regset and set submatch data for
|
||||
- submatches that are using this tag. */
|
||||
- for (i = 0; regset[i] >= 0; i++)
|
||||
- {
|
||||
- int id = regset[i] / 2;
|
||||
- int start = !(regset[i] % 2);
|
||||
- DPRINT((" Using tag %d for %s offset of "
|
||||
- "submatch %d\n", num_tags,
|
||||
- start ? "start" : "end", id));
|
||||
- if (start)
|
||||
- tnfa->submatch_data[id].so_tag = num_tags;
|
||||
- else
|
||||
- tnfa->submatch_data[id].eo_tag = num_tags;
|
||||
- }
|
||||
- }
|
||||
+ tre_purge_regset(regset, tnfa, tag);
|
||||
|
||||
if (!first_pass && minimal_tag >= 0)
|
||||
{
|
||||
|
Loading…
Reference in new issue