[PATCH] Updated Text::Tabs and Text::Wrap to correctly handle Unicode combining characters via \X, and added two new test modules for this functionality.

Hope this is more the style you're looking for.

--tom

---
 lib/Text/Tabs.pm                 |   46 ++++++++---
 lib/Text/TabsWrap/CHANGELOG      |    5 +
 lib/Text/TabsWrap/t/Tabs-ElCid.t |  167 +++++++++++++++++++++++++++++++++=
+++++
 lib/Text/TabsWrap/t/Wrap-JLB.t   |  142 ++++++++++++++++++++++++++++++++
 lib/Text/Wrap.pm                 |   40 ++++++---
 5 files changed, 375 insertions(+), 25 deletions(-)
 create mode 100755 lib/Text/TabsWrap/t/Tabs-ElCid.t
 create mode 100755 lib/Text/TabsWrap/t/Wrap-JLB.t

diff --git a/lib/Text/Tabs.pm b/lib/Text/Tabs.pm
index d3c06a0..7d7fb7c 100644
--- a/lib/Text/Tabs.pm
+++ b/lib/Text/Tabs.pm
@@ -7,15 +7,22 @@ require Exporter;
 @EXPORT =3D qw(expand unexpand $tabstop);
 =

 use vars qw($VERSION $tabstop $debug);
-$VERSION =3D 2009.0305;
+$VERSION =3D 2009.0417;
 =

 use strict;
 =

+use 5.010_000;
+
 BEGIN	{
 	$tabstop =3D 8;
 	$debug =3D 0;
 }
 =

+my $CHUNK =3D qr/\X/;
+
+sub _xlen(_) { scalar(() =3D $_[0] =3D~ /$CHUNK/g) } =

+sub _xpos(_) { _xlen( substr( $_[0], 0, pos($_[0]) ) ) }
+
 sub expand {
 	my @l;
 	my $pad;
@@ -24,10 +31,13 @@ sub expand {
 		for (split(/^/m, $_, -1)) {
 			my $offs =3D 0;
 			s{\t}{
-				$pad =3D $tabstop - (pos() + $offs) % $tabstop;
+			    # this works on both 5.10 and 5.11
+				$pad =3D $tabstop - (_xlen(${^PREMATCH}) + $offs) % $tabstop;
+			    # this works on 5.11, but fails on 5.10
+				#XXX# $pad =3D $tabstop - (_xpos() + $offs) % $tabstop;
 				$offs +=3D $pad - 1;
 				" " x $pad;
-			}eg;
+			}peg;
 			$s .=3D $_;
 		}
 		push(@l, $s);
@@ -44,12 +54,12 @@ sub unexpand
 	my $line;
 	my @lines;
 	my $lastbit;
-	my $ts_as_space =3D " "x$tabstop;
+	my $ts_as_space =3D " " x $tabstop;
 	for $x (@l) {
 		@lines =3D split("\n", $x, -1);
 		for $line (@lines) {
 			$line =3D expand($line);
-			@e =3D split(/(.{$tabstop})/,$line,-1);
+			@e =3D split(/(${CHUNK}{$tabstop})/,$line,-1);
 			$lastbit =3D pop(@e);
 			$lastbit =3D '' =

 				unless defined $lastbit;
@@ -91,7 +101,7 @@ sub expand
 =

 =3Dhead1 NAME
 =

-Text::Tabs -- expand and unexpand tabs per the unix expand(1) and unexpan=
d(1)
+Text::Tabs - expand and unexpand tabs like Unix expand(1) and unexpand(1)
 =

 =3Dhead1 SYNOPSIS
 =

@@ -103,11 +113,17 @@ Text::Tabs -- expand and unexpand tabs per the unix =
expand(1) and unexpand(1)
 =

 =3Dhead1 DESCRIPTION
 =

-Text::Tabs does about what the unix utilities expand(1) and unexpand(1) =

-do.  Given a line with tabs in it, expand will replace the tabs with
+Text::Tabs does most of what the Unix utilities expand(1) and unexpand(1)=
 =

+do.  Given a line with tabs in it, C<expand> replaces those tabs with
 the appropriate number of spaces.  Given a line with or without tabs in
-it, unexpand will add tabs when it can save bytes by doing so (just
-like C<unexpand -a>).  Invisible compression with plain ASCII! =

+it, C<unexpand> adds tabs when it can save bytes by doing so, =

+like the C<unexpand -a> command.  =

+
+Unlike the old Unix utilities, this module correctly accounts for
+any Unicode combining characters (such as diacriticals) that may occur
+in each line for both expansion and unexpansion.  These are overstrike
+characters that do not increment the logical position.  Make sure
+you have the appropriate Unicode settings enabled.
 =

 =3Dhead1 EXAMPLE
 =

@@ -119,14 +135,20 @@ like C<unexpand -a>).  Invisible compression with pl=
ain ASCII!
     print unexpand $_;
   }
 =

-Instead of the C<expand> comand, use:
+Instead of the shell's C<expand> comand, use:
 =

   perl -MText::Tabs -n -e 'print expand $_'
 =

-Instead of the C<unexpand -a> command, use:
+Instead of the shell's C<unexpand -a> command, use:
 =

   perl -MText::Tabs -n -e 'print unexpand $_'
 =

+=3Dhead1 BUGS
+
+Text::Tabs handles only tabs (C<"\t">) and combining characters (C</\pM/)=
..  It doesn't
+count backwards for backspaces (C<"\t">), omit other non-printing control=
 characters (C</\pC/>),
+or otherwise deal with any other zero-, half-, and full-width characters.
+
 =3Dhead1 LICENSE
 =

 Copyright (C) 1996-2002,2005,2006 David Muir Sharnoff.  =

diff --git a/lib/Text/TabsWrap/CHANGELOG b/lib/Text/TabsWrap/CHANGELOG
index df83979..fe1c1f1 100644
--- a/lib/Text/TabsWrap/CHANGELOG
+++ b/lib/Text/TabsWrap/CHANGELOG
@@ -1,3 +1,8 @@
+=3D 2009/04/17
+
+Added support for Unicode combining characters to both =

+Text::Tabs and Text::Wrap, plus a new test suite for each
+of these new functionalities.  --tchrist
 =

 =3D 2009/03/05
 =

diff --git a/lib/Text/TabsWrap/t/Tabs-ElCid.t b/lib/Text/TabsWrap/t/Tabs-E=
lCid.t
new file mode 100755
index 0000000..e26e81d
--- /dev/null
+++ b/lib/Text/TabsWrap/t/Tabs-ElCid.t
@@ -0,0 +1,167 @@
+#!perl
+
+use strict;
+use warnings "FATAL" =3D> "all";
+use Text::Tabs;
+
+require bytes;
+
+our $Errors =3D 0;
+
+our @DATA =3D (
+    [ # DATALINE #0
+	sub { die "there is no line 0" } =

+    ],
+    { # DATALINE #1
+	OLD =3D> { BYTES =3D>  71, CHARS =3D> 59, CHUNKS =3D> 47, WORDS =3D> 7, =
TABS =3D> 3 },
+	NEW =3D> { BYTES =3D>  92, CHARS =3D> 80, CHUNKS =3D> 68, WORDS =3D> 7, =
TABS =3D> 0 },
+    },
+    { # DATALINE #2
+	OLD =3D> { BYTES =3D>  45, CHARS =3D> 43, CHUNKS =3D> 41, WORDS =3D> 6, =
TABS =3D> 3 },
+	NEW =3D> { BYTES =3D>  65, CHARS =3D> 63, CHUNKS =3D> 61, WORDS =3D> 6, =
TABS =3D> 0 },
+    },
+    { # DATALINE #3
+	OLD =3D> { BYTES =3D>  47, CHARS =3D> 45, CHUNKS =3D> 43, WORDS =3D> 7, =
TABS =3D> 3 },
+	NEW =3D> { BYTES =3D>  64, CHARS =3D> 62, CHUNKS =3D> 60, WORDS =3D> 7, =
TABS =3D> 0 },
+    },
+    { # DATALINE #4
+	OLD =3D> { BYTES =3D>  49, CHARS =3D> 47, CHUNKS =3D> 45, WORDS =3D> 7, =
TABS =3D> 3 },
+	NEW =3D> { BYTES =3D>  69, CHARS =3D> 67, CHUNKS =3D> 65, WORDS =3D> 7, =
TABS =3D> 0 },
+    },
+    { # DATALINE #5
+	OLD =3D> { BYTES =3D>  83, CHARS =3D> 62, CHUNKS =3D> 41, WORDS =3D> 7, =
TABS =3D> 4 },
+	NEW =3D> { BYTES =3D> 105, CHARS =3D> 84, CHUNKS =3D> 63, WORDS =3D> 7, =
TABS =3D> 0 },
+    },
+    { # DATALINE #6
+	OLD =3D> { BYTES =3D>  55, CHARS =3D> 53, CHUNKS =3D> 51, WORDS =3D> 8, =
TABS =3D> 3 },
+	NEW =3D> { BYTES =3D>  76, CHARS =3D> 74, CHUNKS =3D> 72, WORDS =3D> 8, =
TABS =3D> 0 },
+    },
+    { # DATALINE #7
+	OLD =3D> { BYTES =3D>  42, CHARS =3D> 40, CHUNKS =3D> 38, WORDS =3D> 7, =
TABS =3D> 4 },
+	NEW =3D> { BYTES =3D>  65, CHARS =3D> 63, CHUNKS =3D> 61, WORDS =3D> 7, =
TABS =3D> 0 },
+    },
+    { # DATALINE #8
+	OLD =3D> { BYTES =3D>  80, CHARS =3D> 65, CHUNKS =3D> 52, WORDS =3D> 9, =
TABS =3D> 1 },
+	NEW =3D> { BYTES =3D>  87, CHARS =3D> 72, CHUNKS =3D> 59, WORDS =3D> 9, =
TABS =3D> 0 },
+    },
+    { # DATALINE #9
+	OLD =3D> { BYTES =3D>  43, CHARS =3D> 41, CHUNKS =3D> 41, WORDS =3D> 7, =
TABS =3D> 3 },
+	NEW =3D> { BYTES =3D>  63, CHARS =3D> 61, CHUNKS =3D> 61, WORDS =3D> 7, =
TABS =3D> 0 },
+    },
+);
+
+$| =3D 1;
+print "0..$#DATA\n";
+
+$Errors +=3D table_ok();
+check_data();
+
+if ($Errors) {
+    die "Error count: $Errors";
+} else {
+    exit(0);
+} =

+
+
+# first some sanity checks
+sub table_ok { =

+    my $bad =3D 0;
+    for my $i ( 1 .. $#DATA ) {
+
+	if ( $DATA[$i]{NEW}{TABS} ) {
+	    warn "new data should have no tabs in it at table line $i";
+	    $bad++;
+	} =

+
+	if ( $DATA[$i]{NEW}{WORDS} !=3D $DATA[$i]{OLD}{WORDS} ) {
+	    warn "word count shouldn't change upon tab expansion at table line $=
i";
+	    $bad++;
+	} =

+    } =

+    print $bad ? "not " : "", "ok 0\n";
+    return $bad;
+}
+
+sub check($$$$) {
+    die "expected 4 arguments" unless @_ =3D=3D 4;
+    my ($found, $index, $version, $item) =3D @_;
+    my $expected =3D $DATA[$index]{$version}{$item};
+    return 1 if $found =3D=3D $expected;
+    warn sprintf("%s line %d expected %d %s, found %d instead",
+		  ucfirst(lc($version)), =

+			  $index,     $expected, =

+					 lc($item),  =

+						 $found);
+    return 0;
+} =

+
+sub check_data { =

+
+    binmode(DATA, ":utf8") || die "can't binmode DATA to utf8: $!";
+    while ( my $_ =3D <DATA> ) {
+
+	my $bad =3D 0;
+
+	if ($. > $#DATA) {
+	    die "too many lines of data";
+	} =

+
+	$DATA[$.]{OLD}{DATA} =3D $_;
+
+	my($char_count,  $byte_count, $chunk_count, $word_count, $tab_count);
+
+	$byte_count  =3D bytes::length($_);
+	$char_count  =3D length();
+	$chunk_count =3D () =3D /\X/g;
+	$word_count  =3D () =3D /(?:(?=3D\pL)\X)+/g;
+	$tab_count   =3D y/\t//;
+
+	$bad++ unless check($byte_count,  $., "OLD", "BYTES");
+	$bad++ unless check($char_count,  $., "OLD", "CHARS");
+	$bad++ unless check($chunk_count, $., "OLD", "CHUNKS");
+	$bad++ unless check($word_count,  $., "OLD", "WORDS");
+	$bad++ unless check($tab_count,   $., "OLD", "TABS");
+
+	$_ =3D expand($_);
+
+	$DATA[$.]{NEW}{DATA} =3D $_;
+
+	$byte_count  =3D bytes::length($_);
+	$char_count  =3D length();
+	$chunk_count =3D () =3D /\X/g;
+	$word_count  =3D () =3D /(?:(?=3D\pL)\X)+/g;
+	$tab_count   =3D y/\t//;
+
+	$bad++ unless check($byte_count,  $., "NEW", "BYTES");
+	$bad++ unless check($char_count,  $., "NEW", "CHARS");
+	$bad++ unless check($chunk_count, $., "NEW", "CHUNKS");
+	$bad++ unless check($word_count,  $., "NEW", "WORDS");
+	$bad++ unless check($tab_count,   $., "NEW", "TABS");
+
+	$_ =3D unexpand($_);
+
+	if ($_ ne $DATA[$.]{OLD}{DATA}) {
+	    warn "expand/unexpand round-trip equivalency failed at line $.";
+	    warn sprintf("  Expected:\n%s\n%v02x\n  But got:\n%s\n%v02x\n",
+		    ( $DATA[$.]{OLD}{DATA} ) x 2, ($_) x 2 );
+	    $bad++;
+	} =

+
+	print $bad ? "not " : "", "ok $.\n";
+	$Errors +=3D $bad;
+
+    } =

+
+}
+
+
+__DATA__
+	De los sos o=CC=B2j=CC=B2o=CC=B2s=CC=B2 		tan fuertemientre l=CC=B2l=CC=B2=
o=CC=B2r=CC=B2a=CC=B2n=CC=B2d=CC=B2o=CC=B2,
+	tornava la cabec=CC=A7a		i esta=CC=81valos catando.
+	Vio puertas abiertas		e uc=CC=A7os sin can=CC=83ados,
+	alca=CC=81ndaras va=CC=81zias		sin pielles e sin mantos
+	e s=CC=B2i=CC=B2n=CC=B2 f=CC=B2a=CC=B2l=CC=B2c=CC=B2o=CC=B2n=CC=B2e=CC=B2=
s=CC=B2			e s=CC=B2i=CC=B2n=CC=B2 a=CC=B2d=CC=B2t=CC=B2o=CC=B2r=CC=B2e=CC=B2=
s=CC=B2 mudados.
+	Sospiro=CC=81 mio C=CC=A7id,		ca mucho avie grandes cuidados.
+	Fablo=CC=81 mio C=CC=A7id			bien e tan mesurado:
+       =E2=80=9Cgrado a ti=CC=81, s=CC=B3e=CC=B3n=CC=83=CC=B3o=CC=B3r=CC=B3=
 p=CC=B3a=CC=B3d=CC=B3r=CC=B3e=CC=B3,	que esta=CC=81s en alto!
+	Esto me an buelto		mis enemigos malos.=E2=80=9D
diff --git a/lib/Text/TabsWrap/t/Wrap-JLB.t b/lib/Text/TabsWrap/t/Wrap-JLB=
..t
new file mode 100755
index 0000000..384998d
--- /dev/null
+++ b/lib/Text/TabsWrap/t/Wrap-JLB.t
@@ -0,0 +1,142 @@
+#!perl
+
+use strict;
+use warnings "FATAL" =3D> "all";
+use Text::Wrap;
+
+$Text::Wrap::columns =3D 72;
+
+require bytes;
+
+our $Errors =3D 0;
+
+$/ =3D q();
+binmode(DATA, ":utf8") || die "can't binmode DATA to utf8: $!";
+
+our @DATA =3D (
+    [ # paragraph 0
+	sub { die "there is no paragraph 0" } =

+    ],
+    { # paragraph 1
+	OLD =3D> { BYTES =3D>    44, CHARS =3D>   44, CHUNKS =3D>   44, WORDS =3D=
>   7, TABS =3D>  3, LINES =3D>  4 },
+	NEW =3D> { BYTES =3D>    44, CHARS =3D>   44, CHUNKS =3D>   44, WORDS =3D=
>   7, TABS =3D>  3, LINES =3D>  4 },
+    },
+    { # paragraph 2
+	OLD =3D> { BYTES =3D>  1766, CHARS =3D> 1635, CHUNKS =3D> 1507, WORDS =3D=
> 275, TABS =3D>  0, LINES =3D>  2 },
+	NEW =3D> { BYTES =3D>  1766, CHARS =3D> 1635, CHUNKS =3D> 1507, WORDS =3D=
> 275, TABS =3D>  0, LINES =3D> 24 },
+    },
+    { # paragraph 3
+	OLD =3D> { BYTES =3D>   157, CHARS =3D>  148, CHUNKS =3D>  139, WORDS =3D=
>  27, TABS =3D>  0, LINES =3D>  2 },
+	NEW =3D> { BYTES =3D>   157, CHARS =3D>  148, CHUNKS =3D>  139, WORDS =3D=
>  27, TABS =3D>  0, LINES =3D>  3 },
+    },
+    { # paragraph 4
+	OLD =3D> { BYTES =3D>    30, CHARS =3D>   25, CHUNKS =3D>   24, WORDS =3D=
>   3, TABS =3D>  4, LINES =3D>  1 },
+	NEW =3D> { BYTES =3D>    30, CHARS =3D>   25, CHUNKS =3D>   24, WORDS =3D=
>   3, TABS =3D>  4, LINES =3D>  1 },
+    },
+);
+
+$| =3D 1;
+print "0..$#DATA\n";
+
+$Errors +=3D table_ok();
+check_data();
+
+if ($Errors) {
+    die "Error count: $Errors";
+} else {
+    exit(0);
+} =

+
+
+# first some sanity checks
+sub table_ok { =

+    my $bad =3D 0;
+    for my $i ( 1 .. $#DATA ) {
+	for my $item (qw[ bytes chars chunks words tabs ]) {
+	    if ( $DATA[$i]{NEW}{uc $item} !=3D $DATA[$i]{OLD}{uc $item} ) {
+		warn "\u$item count shouldn't change upon wrapping at table paragraph $=
i";
+		$bad++;
+	    } =

+	}
+    } =

+    print $bad ? "not " : "", "ok 0\n";
+    return $bad;
+}
+
+sub check($$$$) {
+    die "expected 4 arguments" unless @_ =3D=3D 4;
+    my ($found, $index, $version, $item) =3D @_;
+    my $expected =3D $DATA[$index]{$version}{$item};
+    return 1 if $found =3D=3D $expected;
+    warn sprintf("%s paragraph %d expected %d %s, found %d instead",
+		  ucfirst(lc($version)), =

+			  $index,     $expected, =

+					 lc($item),  =

+						 $found);
+    return 0;
+}
+
+sub check_data { =

+
+    binmode(DATA, ":utf8") || die "can't binmode DATA to utf8: $!";
+    while ( my $_ =3D <DATA> ) {
+
+	my $bad =3D 0;
+
+	if ($. > $#DATA) {
+	    die "too many paragraphs of data";
+	} =

+
+	$DATA[$.]{OLD}{DATA} =3D $_;
+
+	my($char_count,  $byte_count, $chunk_count, $word_count, $tab_count, $li=
ne_count);
+
+	$byte_count  =3D bytes::length($_);
+	$char_count  =3D length();
+	$chunk_count =3D () =3D /\X/g;
+	$word_count  =3D () =3D /(?:(?=3D\pL)\X)+/g;
+	$tab_count   =3D y/\t//;
+	$line_count  =3D y/\n//;
+
+	$bad++ unless check($byte_count,  $., "OLD", "BYTES");
+	$bad++ unless check($char_count,  $., "OLD", "CHARS");
+	$bad++ unless check($chunk_count, $., "OLD", "CHUNKS");
+	$bad++ unless check($word_count,  $., "OLD", "WORDS");
+	$bad++ unless check($tab_count,   $., "OLD", "TABS");
+	$bad++ unless check($line_count,  $., "OLD", "LINES");
+
+	my $nl =3D "\n" x chomp;
+
+	$_ =3D wrap("", "", $_) . $nl;
+
+	$byte_count  =3D bytes::length($_);
+	$char_count  =3D length();
+	$chunk_count =3D () =3D /\X/g;
+	$word_count  =3D () =3D /(?:(?=3D\pL)\X)+/g;
+	$tab_count   =3D y/\t//;
+	$line_count  =3D y/\n//;
+
+	$bad++ unless check($byte_count,  $., "NEW", "BYTES");
+	$bad++ unless check($char_count,  $., "NEW", "CHARS");
+	$bad++ unless check($chunk_count, $., "NEW", "CHUNKS");
+	$bad++ unless check($word_count,  $., "NEW", "WORDS");
+	$bad++ unless check($tab_count,   $., "NEW", "TABS");
+	$bad++ unless check($line_count,  $., "NEW", "LINES");
+
+	print $bad ? "not " : "", "ok $.\n";
+	$Errors +=3D $bad;
+
+    } =

+
+}
+
+__DATA__
+	Los dos reyes
+	     y
+	Los dos laberintos
+
+Cuentan los hombres dignos de fe (pero A=CC=B3l=CC=B3a=CC=81=CC=B3 sabe m=
a=CC=81s) que en los primeros di=CC=81as hubo un rey de l=CC=B2a=CC=B2s=CC=
=B2 i=CC=B2s=CC=B2l=CC=B2a=CC=B2s=CC=B2 d=CC=B2e=CC=B2 B=CC=B2a=CC=B2b=CC=B2=
i=CC=B2l=CC=B2o=CC=B2n=CC=B2i=CC=B2a=CC=B2 que congrego=CC=81 a sus arquit=
ectos y magos y les mando=CC=81 construir un laberinto tan perplejo y suti=
l que los varones ma=CC=81s prudentes no se aventuraban a entrar, y los qu=
e entraban se perdi=CC=81an.  Esa obra era un esca=CC=81ndalo, porque la c=
onfusio=CC=81n y la maravilla son operaciones propias de D=CC=B3i=CC=B3o=CC=
=B3s=CC=B3 y no de los hombres.  Con el andar del tiempo vino a su corte u=
n rey de los a=CC=81rabes, y el rey de B=CC=B2a=CC=B2b=CC=B2i=CC=B2l=CC=B2=
o=CC=B2n=CC=B2i=CC=B2a=CC=B2 (para hacer burla de la simplicidad de su hue=
=CC=81sped) lo hizo penetrar en el laberinto, donde vago=CC=81 afrentado y=
 confundido hasta la declinacio=CC=81n de la tarde.  Entonces imploro=CC=81=
 socorro divino y dio con la puerta.  Sus labios no profirieron queja ning=
una, pero le dijo al rey de B=CC=B2a=CC=B2b=CC=B2i=CC=B2l=CC=B2o=CC=B2n=CC=
=B2i=CC=B2a=CC=B2 que e=CC=81l en A=CC=B2r=CC=B2a=CC=B2b=CC=B2i=CC=B2a=CC=B2=
 teni=CC=81a otro laberinto y que, si D=CC=B3i=CC=B3o=CC=B3s=CC=B3 era ser=
vido, se lo dari=CC=81a a conocer algu=CC=81n =
di=CC=81a.  Luego regreso=CC=81 a A=CC=B2r=CC=B2a=CC=B2b=CC=B2i=CC=B2a=CC=B2=
, junto=CC=81 sus capitanes y sus alcaides y estrago=CC=81 l=CC=B2o=CC=B2s=
=CC=B2 r=CC=B2e=CC=B2i=CC=B2n=CC=B2o=CC=B2s=CC=B2 d=CC=B2e=CC=B2 B=CC=B2a=CC=
=B2b=CC=B2i=CC=B2l=CC=B2o=CC=B2n=CC=B2i=CC=B2a=CC=B2 con tan venturosa for=
tuna que derribo=CC=81 sus castillos, rompio=CC=81 sus gentes e hizo cauti=
vo al mismo rey.  Lo amarro=CC=81 encima de un camello veloz y lo llevo=CC=
=81 al desierto.  Cabalgaron tres di=CC=81as, y le dijo: =C2=AB=C2=A1Oh, r=
ey del tiempo y substancia y cifra del siglo!, en B=CC=B2a=CC=B2b=CC=B2i=CC=
=B2l=CC=B2o=CC=B2n=CC=B2i=CC=B2a=CC=B2 me quisiste perder en un laberinto =
de bronce con muchas escaleras, puertas y muros; ahora e=CC=B3l=CC=B3 P=CC=
=B3o=CC=B3d=CC=B3e=CC=B3r=CC=B3o=CC=B3s=CC=B3o=CC=B3 ha tenido a bien que =
te muestre el mi=CC=81o, donde no hay escaleras que subir, ni puertas que =
forzar, ni fatigosas galeri=CC=81as que recorrer, ni muros que te veden el=
 paso.=C2=BB
+
+Luego le desato=CC=81 las ligaduras y lo abandono=CC=81 en mitad del desi=
erto, donde murio=CC=81 de hambre y de sed.  La gloria sea con A=CC=B3q=CC=
=B3u=CC=B3e=CC=81=CC=B3l=CC=B3 que no muere.
+
+				=E2=80=94=E2=80=94Jorge Lui=CC=81s Borges
diff --git a/lib/Text/Wrap.pm b/lib/Text/Wrap.pm
index de86202..511a8d3 100644
--- a/lib/Text/Wrap.pm
+++ b/lib/Text/Wrap.pm
@@ -7,7 +7,7 @@ require Exporter;
 @EXPORT =3D qw(wrap fill);
 @EXPORT_OK =3D qw($columns $break $huge);
 =

-$VERSION =3D 2009.0305;
+$VERSION =3D 2009.0417;
 =

 use vars qw($VERSION $columns $debug $break $huge $unexpand $tabstop
 	$separator $separator2);
@@ -16,7 +16,7 @@ use strict;
 BEGIN	{
 	$columns =3D 76;  # <=3D screen width
 	$debug =3D 0;
-	$break =3D '\s';
+	$break =3D '(?=3D\s)\X';
 	$huge =3D 'wrap'; # alternatively: 'die' or 'overflow'
 	$unexpand =3D 1;
 	$tabstop =3D 8;
@@ -24,6 +24,12 @@ BEGIN	{
 	$separator2 =3D undef;
 }
 =

+my $CHUNK =3D qr/\X/;
+
+sub _xlen(_) { scalar(() =3D $_[0] =3D~ /$CHUNK/g) }
+
+sub _xpos(_) { _xlen( substr( $_[0], 0, pos($_[0]) ) ) }
+
 use Text::Tabs qw(expand unexpand);
 =

 sub wrap
@@ -35,14 +41,14 @@ sub wrap
 	my $tail =3D pop(@t);
 	my $t =3D expand(join("", (map { /\s+\z/ ? ( $_ ) : ($_, ' ') } @t), $ta=
il));
 	my $lead =3D $ip;
-	my $nll =3D $columns - length(expand($xp)) - 1;
+	my $nll =3D $columns - _xlen(expand($xp)) - 1;
 	if ($nll <=3D 0 && $xp ne '') {
-		my $nc =3D length(expand($xp)) + 2;
+		my $nc =3D _xlen(expand($xp)) + 2;
 		warnings::warnif "Increasing \$Text::Wrap::columns from $columns to $nc=
 to accommodate length of subsequent tab";
 		$columns =3D $nc;
 		$nll =3D 1;
 	}
-	my $ll =3D $columns - length(expand($ip)) - 1;
+	my $ll =3D $columns - _xlen(expand($ip)) - 1;
 	$ll =3D 0 if $ll < 0;
 	my $nl =3D "";
 	my $remainder =3D "";
@@ -51,17 +57,17 @@ sub wrap
 =

 	pos($t) =3D 0;
 	while ($t !~ /\G(?:$break)*\Z/gc) {
-		if ($t =3D~ /\G([^\n]{0,$ll})($break|\n+|\z)/xmgc) {
+		if ($t =3D~ /\G((?:(?=3D[^\n])\X){0,$ll})($break|\n+|\z)/xmgc) {
 			$r .=3D $unexpand =

 				? unexpand($nl . $lead . $1)
 				: $nl . $lead . $1;
 			$remainder =3D $2;
-		} elsif ($huge eq 'wrap' && $t =3D~ /\G([^\n]{$ll})/gc) {
+		} elsif ($huge eq 'wrap' && $t =3D~ /\G((?:(?!=3D[^\n])\X){$ll})/gc) {
 			$r .=3D $unexpand =

 				? unexpand($nl . $lead . $1)
 				: $nl . $lead . $1;
 			$remainder =3D defined($separator2) ? $separator2 : $separator;
-		} elsif ($huge eq 'overflow' && $t =3D~ /\G([^\n]*?)($break|\n+|\z)/xmg=
c) {
+		} elsif ($huge eq 'overflow' && $t =3D~ /\G((?:(?=3D[^\n])\X)*?)($break=
|\n+|\z)/xmgc) {
 			$r .=3D $unexpand =

 				? unexpand($nl . $lead . $1)
 				: $nl . $lead . $1;
@@ -90,7 +96,9 @@ sub wrap
 =

 	print "Finish up with '$lead'\n" if $debug;
 =

-	$r .=3D $lead . substr($t, pos($t), length($t)-pos($t))
+	my($opos) =3D pos($t);
+
+	$r .=3D $lead . substr($t, pos($t), length($t) - pos($t))
 		if pos($t) ne length($t);
 =

 	print "-----------$r---------\n" if $debug;;
@@ -165,14 +173,20 @@ all subsequent lines (C<$subsequent_tab>) independen=
tly.  Please note:
 C<$initial_tab> and C<$subsequent_tab> are the literal strings that will
 be used: it is unlikely you would want to pass in a number.
 =

-Text::Wrap::fill() is a simple multi-paragraph formatter.  It formats
+C<Text::Wrap::fill()> is a simple multi-paragraph formatter.  It formats
 each paragraph separately and then joins them together when it's done.  I=
t
 will destroy any whitespace in the original text.  It breaks text into
-paragraphs by looking for whitespace after a newline.  In other respects
+paragraphs by looking for whitespace after a newline.  In other respects,
 it acts like wrap().
 =

 Both C<wrap()> and C<fill()> return a single string.
 =

+Unlike the old Unix fmt(1) utility, this module correctly accounts for
+any Unicode combining characters (such as diacriticals) that may occur
+in each line for both expansion and unexpansion.  These are overstrike
+characters that do not increment the logical position.  Make sure
+you have the appropriate Unicode settings enabled.
+
 =3Dhead1 OVERRIDES
 =

 C<Text::Wrap::wrap()> has a number of variables that control its behavior=
..
@@ -252,8 +266,8 @@ Result:
 =

 =3Dhead1 SEE ALSO
 =

-For wrapping multi-byte characters: L<Text::WrapI18N>.
-For more detailed controls: L<Text::Format>.
+For correct handling of East Asian half- and full-width characters, =

+see L<Text::WrapI18N>.  For more detailed controls: L<Text::Format>.
 =

 =3Dhead1 LICENSE
 =

-- =

1.5.6.4

0
tchrist
4/19/2009 11:40:57 PM
perl.perl5.porters 48287 articles. 1 followers. Follow

3 Replies
577 Views

Similar Articles

[PageSpeed] 24

--0015175cde82b6e8220467f51110
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 7bit

On Sun, Apr 19, 2009 at 7:40 PM, Tom Christiansen <tchrist@perl.com> wrote:

> +=head1 BUGS
> +
> +Text::Tabs handles only tabs (C<"\t">) and combining characters (C</\pM/).
>  It doesn't
> +count backwards for backspaces (C<"\t">), omit other non-printing control
> characters (C</\pC/>),
> +or otherwise deal with any other zero-, half-, and full-width characters.
> +
>

Shouldn't that be "\b" for the second "\t"?

--0015175cde82b6e8220467f51110--
0
ikegami
4/20/2009 4:36:08 AM
>On Sun, Apr 19, 2009 at 7:40 PM, Tom Christiansen <tchrist@perl.com> wrote:

>> +=head1 BUGS
>> +
>> +Text::Tabs handles only tabs (C<"\t">) and combining characters (C</\pM/).
>>  It doesn't
>> +count backwards for backspaces (C<"\t">), omit other non-printing control
>> characters (C</\pC/>),
>> +or otherwise deal with any other zero-, half-, and full-width characters.
>> +
>>

>Shouldn't that be "\b" for the second "\t"?

Yes, it should.

--tom
0
tchrist
4/20/2009 6:18:41 AM
Eric Brine wrote on Mon, 20 Apr 2009 at 00:36:08 EDT:

>On Sun, Apr 19, 2009 at 7:40 PM, Tom Christiansen <tchrist@perl.com> wrote:

>> +=head1 BUGS
>> +
>> +Text::Tabs handles only tabs (C<"\t">) and combining characters (C</\pM/).
>>  It doesn't
>> +count backwards for backspaces (C<"\t">), omit other non-printing control
>> characters (C</\pC/>),
>> +or otherwise deal with any other zero-, half-, and full-width characters.
>> +

> Shouldn't that be "\b" for the second "\t"?

I already said you're right, but I just wanted to add another followup
about all this.  DMS and I are talking about how to make this work for
earlier versions than 5.10.  I use a sub(_) proto, which I really don't
need to, but also the m//p and ${^PREMATCH} feature.  (This is how I 
learned it was set/unset at times that seemed a bit surprising.)

Because it's an amphibious module, David would reasonably like to make 
it work on earlier versions of Perl, too, and those are getting in the
way.  We'll work something out, I'm sure.

I did some benchmarking, BTW, and found that some things make a big
difference.  We may even wish to have some by-version loading of
different code-paths.  I also thought of not bothering the hard way
if the UTF-8 flag wasn't on, but per my other mail, this is still
like going on a Sunday drive with your heisenbuggy.  

Nobody has commented on that one, though.

I wish I could get something like /(?=[^\s\PC])\X/ to work better.
I also wish that we could count the old overstrike X\bX and the old
underline _\bX to count as singletons, the way less does.

--tom
0
tchrist
4/20/2009 7:30:30 PM
Reply:

Similar Artilces:

[PATCH] code FIXES ++ docs ++ NEW test cases for Text::{Tabs,Wrap}
------- =_aaaaaaaaaa0 Content-Description: tchrist's preambulatory note Content-Disposition: inline Content-Type: text/plain; charset="us-ascii"; format="fixed" Content-ID: <12096.1240012019.1@chthon> Please find attached below a tarball containing: lib/Text/TabsWrap/CHANGELOG lib/Text/Tabs.pm lib/Text/Wrap.pm lib/Text/TabsWrap/t/Tabs-ElCid.t [4801 bytes] lib/Text/TabsWrap/t/Wrap-JLB.t [5553 bytes] Here's what I did: 0. started as a baseline with a git pull and build this morning, to v5.11.0 (GitLive-blead-887-g0...

superreview granted: [Bug 360869] text-anchor='middle' does not work with text content added via certain scripts : [Attachment 246072] branch patch
Robert O'Callahan (Novell) <roc@ocallahan.org> has granted T Rowley (IBM) <tor@acm.org>'s request for superreview: Bug 360869: text-anchor='middle' does not work with text content added via certain scripts https://bugzilla.mozilla.org/show_bug.cgi?id=360869 Attachment 246072: branch patch - missing chunk from bug 307155 https://bugzilla.mozilla.org/attachment.cgi?id=246072&action=edit ...

superreview requested: [Bug 360869] text-anchor='middle' does not work with text content added via certain scripts : [Attachment 246072] branch patch
T Rowley (IBM) <tor@acm.org> has asked Robert O'Callahan (Novell) <roc@ocallahan.org> for superreview: Bug 360869: text-anchor='middle' does not work with text content added via certain scripts https://bugzilla.mozilla.org/show_bug.cgi?id=360869 Attachment 246072: branch patch - missing chunk from bug 307155 https://bugzilla.mozilla.org/attachment.cgi?id=246072&action=edit ------- Additional Comments from T Rowley (IBM) <tor@acm.org> While if writing new code I'd use IsEmpty() and shuffle the code in the method a bit, I figured it was best to ...

Text does not wrap correctly in rich text control
Hi, I'm having this problem: 'Text does not wrap correctly in rich text control'. I'm using PB10.5.1. I've done some research on the Web and tried: set the following variable in the pb.ini file: [RichText] PageSizeAsControlSize=1 Should I install PB10.5.2? Does it resolve the problem? Any help would be appreciated. Thank you. Joan A bit more description about the problem. I have a rich text control in my app. I have tried to set property to 'Wordwrap, which dosen't work. I have tried to set the following variable in the pb.ini file: ...

Error after updating text in text/html module
I get this error after updating text in the text html module.The text is shown on 2 pages, and bouth pages is not showing.It is only this module instanse that is problems with...The error on the page.Control 'cfd58' referenced by the ControlToValidate property of '' cannot be validated.Or look here http://www.vandglad.dk/Default.aspx?tabid=93 What happend, and is there a way to get the module text back ??? Regards Dan Forgot to say, I am using version 3.0.12....Dan...

superreview granted: [Bug 340667] New ATK: Expose <br>'s as \n character, combining before & after blocks of text together : [Attachment 227177] Requires patch from bug 312093 to build
Robert O'Callahan (Novell) <roc@ocallahan.org> has granted Aaron Leventhal <aaronleventhal@moonset.net>'s request for superreview: Bug 340667: New ATK: Expose <br>'s as \n character, combining before & after blocks of text together https://bugzilla.mozilla.org/show_bug.cgi?id=340667 Attachment 227177: Requires patch from bug 312093 to build https://bugzilla.mozilla.org/attachment.cgi?id=227177&action=edit ...

superreview requested: [Bug 340667] New ATK: Expose <br>'s as \n character, combining before & after blocks of text together : [Attachment 227177] Requires patch from bug 312093 to build
Aaron Leventhal <aaronleventhal@moonset.net> has asked Robert O'Callahan (Novell) <roc@ocallahan.org> for superreview: Bug 340667: New ATK: Expose <br>'s as \n character, combining before & after blocks of text together https://bugzilla.mozilla.org/show_bug.cgi?id=340667 Attachment 227177: Requires patch from bug 312093 to build https://bugzilla.mozilla.org/attachment.cgi?id=227177&action=edit ...

GD::Graph::pie -- adding text with GD::Text:Wrap -- How ?
My brain is blocking something really simple here. What I want to do is add a block of text to a image created as my $graph =3D new GD::Graph::pie(1200, 1200); Could someone show me an example of adding a wrapbox of text to this image ? Thanks, -pete Pete Lancashire wrote: > My brain is blocking something really simple here. > > What I want to do is add a block of text to > a image created as > > my $graph = new GD::Graph::pie(1200, 1200); > > Could someone show me an example of adding a > wrapbox of text to this image ? ...

function to add text to one of two text boxes not working...
Hi I have the following code which doesn't work, some help would be nice:) I have a dynamically generated asp.net table and in the cells I have hyperlinks with an onclick event added I then pass a string to the javascript and I want to add it to one of the textboxes depending on their existing content. The code is .net 2 within a user control that is using a master page. I thought it might be having problems with the names that .net gives the controls but i tried using standard html input fields and had the same error. The error is 'document.aspnetForm.elements' is null or not an...

Text as text
Name: JohnA Boice Email: jaboiceathughesdotnet Product: Firefox Summary: Text as text Comments: You need to do as Safari does and make ALL text, including buttons, links, bookmarks, and general page text actual text so screen readers can interpret and voice the text. Until this occurs, your products are a real pain to use. And I have limited sight. What about those with less or no sight? Browser Details: Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3 ...

PATCH: Update qr/\X/ documentation text
--------------040408020008040409000706 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Attached --------------040408020008040409000706 Content-Type: text/x-patch; name="0001-Fix-up-pods-for-X.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="0001-Fix-up-pods-for-X.patch" From e9b7f0f6124ec1aab1cbdfc8ce7e8484370c893d Mon Sep 17 00:00:00 2001 From: Karl Williamson <khw@khw-desktop.(none)> Date: Mon, 21 Dec 2009 11:44:35 -0700 Subject: [PATCH] Fix up pods for \X --- pod/perl...

Wrapping Text? (or should that be unwrapped text ;) )
Hello all, I'm new to mozilla and have been using it for about a week or so. Today, I visited the epinions site and was reading some health club reviews and noticed that on some pages the text of the review did not wrap to the page. On most it did, though not all. I took a look at the same pages using IE and they wrapped perfectly. Any ideas? Is there a setting I need to change? Here's a sample page: http://www.epinions.com/content_67494973060 Thanks in advance. ...

New Trigger Text Module for DNN 3.x
Hi.  I am currently using the Trigger text module on my DNN 2 site (offered at Snowcovered.com) and am looking for a similar module for DNN 3.x.Does anyone know it a module that's similar?Below is a list of features I am looking for: Content is hidden until link is clicked Show content without redirecting the user to a different tabThanks! Could you not accomplish this by setting the initial Visibility to Minimized?  They would have to click to expand the container.  And this would require neither another tab--nor a roundtrip to the server (thanks to the ClientAPI)!...

text wrap and description tag: text does not wrap if using .value ????
Hi, I have this issue with the description tag: The text wraps on 2 or more lines if the text is between the <description id="myId"> my text <description/> tags, but doesn't is I use the getElementById("myId").value = "my text that should be long enough to wrap, 30 times" in javascript. Does anyone know a work arounds ? I have tried to use <html:p id="myId" /> and getElementById("myId").innerHTML="my text that should be long enough to wrap, 30 times" bu I get an error NS_ERROR_FAILURE (would the inne...

Web resources about - [PATCH] Updated Text::Tabs and Text::Wrap to correctly handle Unicode combining characters via \X, and added two new test modules for this functionality. - perl.perl5.porters

PlayStation 2 online functionality - Wikipedia, the free encyclopedia
Selected games on Sony 's PlayStation 2 video game console offer online gaming or other online capabilities. Games that enable the feature provide ...

Free Messaging App MessageMe Latest To Lose Access To Facebook Find Friends Functionality
Facebook once again demonstrated that it does not believe imitation is the sincerest form of flattery, blocking access to its find friends functionality ...

FanAppz expands social CRM functionality, offers more insights about brand audience
... with their apps and putting those insights to use in other channels. The Personalized Marketing Platform builds upon the social CRM functionality ...

Reverting the changes to block functionality
Earlier today, we made a change to the way the “block” function of Twitter works. We have decided to revert the change after receiving feedback ...

Users want functionality without complexity with BYOD: Cortado
Mobile workers want to have access to their files while the move without the complexity, according to Cortado APAC sales manager, Brendan Samuels. ...

Apple takes password reset functionality offline after news of serious vulnerability
Earlier Friday, The Verge reported on a significant security exploit with Apple's "reset password" functionality for Apple IDs. Armed with only ...

Updated: Get Kinect-style functionality on your Mac with this free app
Flutter is a free app that brings gesture controls to your Mac via its built-in web cam. Get the download here.

Twitter bolsters search functionality - social media, social networking, Internet-based applications ...
Twitter will bolster search functionality to automatically complete users' queries and suggest related search terms, among other improvements ...

Google announces enhanced snooze functionality for Inbox by Gmail
Google today announced a useful new feature that is now available in its Inbox email app. Starting today, when users "snooze" an email that has ...

Parallels Access remote access app jumps from iPad to iPhone, gains Finder-like functionality
... capabilities of iOS devices by allowing them to serve as points of access to your Mac and Windows computers, and the intuitive and native functionality ...

Resources last updated: 12/11/2015 11:05:03 AM