Applied pending tre bugfix patches now available in the development repo

18 years ago · b2f11c5b77
3 changed files with 211 additions and 0 deletions
--- a/textproc/tre/tre-0.7.5-api-doc.patch
+++ b/textproc/tre/tre-0.7.5-api-doc.patch
@ -0,0 +1,43 @@
+Sun Jan 28 21:08:45 EET 2007  Ville Laurikari <[email protected]>
+  * Fixed regoff_t documentation for wide characters.
+  The documentation erroneously claimed that offsets are always given in
+  bytes (they are bytes in byte and multibyte strings, but wchar_t
+  offsets in wchar_t strings).
+  
+  Thanks to Gregory Sharp for pointing this out.
+diff -rN -u old-stable/doc/tre-api.html new-stable/doc/tre-api.html
+--- old-stable/doc/tre-api.html	2007-11-04 20:25:00.000000000 +0200
+++ new-stable/doc/tre-api.html	2007-11-04 20:25:00.000000000 +0200
+@@ -323,22 +323,21 @@
+ <dl>
+ <dt><tt><font class="type">regoff_t</font> <font
+ class="arg">rm_so</font></tt></dt>
+-<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
+-to start of substring.  </dd>
+<dd>Offset from start of <tt><font class="arg">string</font></tt> to start of
+substring.  </dd>
+ <dt><tt><font class="type">regoff_t</font> <font
+ class="arg">rm_eo</font></tt></dt>
+-<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
+-to the first character after the substring.  </dd>
+<dd>Offset from start of <tt><font class="arg">string</font></tt> to the first
+character after the substring.  </dd>
+ </dl>
+ </blockquote>
+ 
+ <p>
+-The length of a submatch in bytes can be computed by subtracting
+-<code>rm_eo</code> and <code>rm_so</code>.
+-If a parenthesized subexpression did not participate in a match, the
+-<code>rm_so</code> and <code>rm_eo</code> fields for the corresponding
+-<code>pmatch</code> element are set to <code>-1</code>.
+-When a multibyte character set is in effect, the submatch offsets are
+The length of a submatch can be computed by subtracting <code>rm_eo</code> and
+<code>rm_so</code>.  If a parenthesized subexpression did not participate in a
+match, the <code>rm_so</code> and <code>rm_eo</code> fields for the
+corresponding <code>pmatch</code> element are set to <code>-1</code>.  Note
+that when a multibyte character set is in effect, the submatch offsets are
+ given as byte offsets, not character offsets.
+ </p>
+ 
+
--- a/textproc/tre/tre-0.7.5-match.patch
+++ b/textproc/tre/tre-0.7.5-match.patch
@ -0,0 +1,31 @@
+Sun Nov  4 18:47:56 EET 2007  Ville Laurikari <[email protected]>
+  * Fixed a bug in \<.
+  \< always matched at the beginning of the string.  Thanks to Shmuel
+  Zeigerman for the bug report.
+  
+  See http://laurikari.net/pipermail/tre-general/2007-February/000128.html
+diff -rN -u old-stable/lib/tre-match-utils.h new-stable/lib/tre-match-utils.h
+--- old-stable/lib/tre-match-utils.h	2007-11-04 20:30:23.000000000 +0200
+++ new-stable/lib/tre-match-utils.h	2007-11-04 20:30:23.000000000 +0200
+@@ -161,7 +161,7 @@
+        && (next_c != L'\0' || reg_noteol)				      \
+        && (next_c != L'\n' || !reg_newline))				      \
+    || ((assertions & ASSERT_AT_BOW)					      \
+-       && (pos > 0 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))))	      \
+       && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))		      \
+    || ((assertions & ASSERT_AT_EOW)					      \
+        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
+    || ((assertions & ASSERT_AT_WB)					      \
+diff -rN -u old-stable/tests/retest.c new-stable/tests/retest.c
+--- old-stable/tests/retest.c	2007-11-04 20:30:23.000000000 +0200
+++ new-stable/tests/retest.c	2007-11-04 20:30:23.000000000 +0200
+@@ -1121,6 +1121,8 @@
+   test_exec("aax xxa", 0, REG_OK, 2, 3, END);
+   test_comp("\\Bx\\b", REG_EXTENDED, 0);
+   test_exec("aax xxx", 0, REG_OK, 2, 3, END);
+  test_comp("\\<.", REG_EXTENDED, 0);
+  test_exec(";xaa", 0, REG_OK, 1, 2, END);
+ 
+   /* Shorthands for character classes. */
+   test_comp("\\w+", REG_EXTENDED, 0);
+
--- a/textproc/tre/tre-0.7.5-tre_compile.patch
+++ b/textproc/tre/tre-0.7.5-tre_compile.patch
@ -0,0 +1,137 @@
+Fri Mar 16 19:18:02 EET 2007  Ville Laurikari <[email protected]>
+  * Refactoring.
+diff -rN -u old-stable/lib/tre-compile.c new-stable/lib/tre-compile.c
+--- old-stable/lib/tre-compile.c	2007-11-04 20:27:45.000000000 +0200
+++ new-stable/lib/tre-compile.c	2007-11-04 20:27:45.000000000 +0200
+@@ -1,7 +1,7 @@
+ /*
+   tre-compile.c - TRE regex compiler
+ 
+-  Copyright (c) 2001-2006 Ville Laurikari <[email protected]>
+  Copyright (c) 2001-2007 Ville Laurikari <[email protected]>
+ 
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+@@ -127,6 +127,30 @@
+   int next_tag;
+ } tre_tag_states_t;
+ 
+
+/* Go through `regset' and set submatch data for submatches that are
+   using this tag. */
+static void
+tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
+{
+  int i;
+
+  for (i = 0; regset[i] >= 0; i++)
+    {
+      int id = regset[i] / 2;
+      int start = !(regset[i] % 2);
+      DPRINT(("  Using tag %d for %s offset of "
+	      "submatch %d\n", tag,
+	      start ? "start" : "end", id));
+      if (start)
+	tnfa->submatch_data[id].so_tag = tag;
+      else
+	tnfa->submatch_data[id].eo_tag = tag;
+    }
+  regset[0] = -1;
+}
+
+
+ /* Adds tags to appropriate locations in the parse tree in `tree', so that
+    subexpressions marked for submatch addressing can be traced. */
+ static reg_errcode_t
+@@ -281,20 +305,7 @@
+ 				minimal_tag = -1;
+ 				num_minimals++;
+ 			      }
+-			    /* Go through the regset and set submatch data for
+-			       submatches that are using this tag. */
+-			    for (i = 0; regset[i] >= 0; i++)
+-			      {
+-				int id = regset[i] / 2;
+-				int start = !(regset[i] % 2);
+-				DPRINT(("  Using tag %d for %s offset of "
+-					"submatch %d\n", tag,
+-					start ? "start" : "end", id));
+-				if (start)
+-				  tnfa->submatch_data[id].so_tag = tag;
+-				else
+-				  tnfa->submatch_data[id].eo_tag = tag;
+-			      }
+			    tre_purge_regset(regset, tnfa, tag);
+ 			  }
+ 			else
+ 			  {
+@@ -394,20 +405,7 @@
+ 			    minimal_tag = -1;
+ 			    num_minimals++;
+ 			  }
+-			/* Go through the regset and set submatch data for
+-			   submatches that are using this tag. */
+-			for (i = 0; regset[i] >= 0; i++)
+-			  {
+-			    int id = regset[i] / 2;
+-			    int start = !(regset[i] % 2);
+-			    DPRINT(("  Using tag %d for %s offset of "
+-				    "submatch %d\n", tag,
+-				    start ? "start" : "end", id));
+-			    if (start)
+-			      tnfa->submatch_data[id].so_tag = tag;
+-			    else
+-			      tnfa->submatch_data[id].eo_tag = tag;
+-			  }
+			tre_purge_regset(regset, tnfa, tag);
+ 		      }
+ 
+ 		    DPRINT(("  num_tags++\n"));
+@@ -479,20 +477,7 @@
+ 			    minimal_tag = -1;
+ 			    num_minimals++;
+ 			  }
+-			/* Go through the regset and set submatch data for
+-			   submatches that are using this tag. */
+-			for (i = 0; regset[i] >= 0; i++)
+-			  {
+-			    int id = regset[i] / 2;
+-			    int start = !(regset[i] % 2);
+-			    DPRINT(("  Using tag %d for %s offset of "
+-				    "submatch %d\n", tag,
+-				    start ? "start" : "end", id));
+-			    if (start)
+-			      tnfa->submatch_data[id].so_tag = tag;
+-			    else
+-			      tnfa->submatch_data[id].eo_tag = tag;
+-			  }
+			tre_purge_regset(regset, tnfa, tag);
+ 		      }
+ 
+ 		    DPRINT(("  num_tags++\n"));
+@@ -640,23 +625,7 @@
+     } /* end while(tre_stack_num_objects(stack) > bottom) */
+ 
+   if (!first_pass)
+-    {
+-      int i;
+-      /* Go through the regset and set submatch data for
+-	 submatches that are using this tag. */
+-      for (i = 0; regset[i] >= 0; i++)
+-	{
+-	  int id = regset[i] / 2;
+-	  int start = !(regset[i] % 2);
+-	  DPRINT(("  Using tag %d for %s offset of "
+-		  "submatch %d\n", num_tags,
+-		  start ? "start" : "end", id));
+-	  if (start)
+-	    tnfa->submatch_data[id].so_tag = num_tags;
+-	  else
+-	    tnfa->submatch_data[id].eo_tag = num_tags;
+-	}
+-    }
+    tre_purge_regset(regset, tnfa, tag);
+ 
+   if (!first_pass && minimal_tag >= 0)
+     {
+