/* * BurrowsWheeler.cs * * "Hello World" with C# 1.0, by David Bozarth * Pre-processor for run-length compression. * Adapted from C++ BWTransform.h, BWTransform.cpp by Rasmus Christian Kaae * ( from http://www.codeproject.com/KB/stl/Burrows-Wheeler_Transform.aspx ) * This is a non-templated class, since C# 1.0 doesn't have Generics. * (Will be putting in templates soon, since I just installed VS 2005.) * There is an n-squared problem in both space and time. * I will attempt to address this by either or both of the following: * - encoding cyclic permutations by just keeping indices (instead of repeating data) * - performing sequential smaller transforms. * * 26 November 2008 */ #region Using directives using System; using System.Collections; //using System.Collections.Generic; using System.IO; using System.Text; #endregion namespace Compression { public class BurrowsWheeler : ArrayList { // Vector of char public class BWTransformationCode : ArrayList { public void StdOut() { StdOut( ' ' ); } public void StdOut( char delimiter ) { for ( int i = 0; i < Count; i++ ) { char next = (char)(this)[i]; if ( next == delimiter ) { System.Console.Write( "|" ); } else { System.Console.Write( next ); } if (( i + 1 < Count ) && ( delimiter != (char)0 )) { System.Console.Write( delimiter ); } } } public int FileRead( string fileSpec, int bufSize ) { Clear(); Stream inStream = File.OpenRead( fileSpec ); byte[] bufByte = new byte[bufSize]; int nBytesRead = inStream.Read( bufByte, 0, bufSize ); while ( 0 < nBytesRead ) { for ( int i = 0; i < nBytesRead; ++i ) { Add((object) System.Convert.ToChar( bufByte[i] )); } nBytesRead = inStream.Read( bufByte, 0, bufSize ); } inStream.Close(); return Count; } public void FileWrite( string fileSpec ) { Stream outStream = File.OpenWrite( fileSpec ); byte[] bufByte = new byte[Count]; for ( int i = 0; i < Count; ++i ) { bufByte[i] = System.Convert.ToByte( (char) this[i] ); } outStream.Write( bufByte, 0, Count ); outStream.Close(); } } // class BWTransformationCode public class BWInternalVector : BWTransformationCode, IComparable { public int CompareTo( Object vin ) { BWInternalVector v = (BWInternalVector) vin; BWInternalVector x = new BWInternalVector(); BWInternalVector y = new BWInternalVector(); if ( v.Count < this.Count ) { x = v; y = this; } else { x = this; y = v; } int ans = 0; for ( int i = 0; i < x.Count; ++i ) { IComparable ox = (IComparable) x[i]; IComparable oy = (IComparable) y[i]; ans = ox.CompareTo( oy ); if ( ans != 0 ) { if ( v.Count < this.Count ) { ans = -ans ; } return ans; } } if ( v.Count < this.Count ) { return 1; } if ( this.Count < v.Count ) { return -1; } return 0; } // method CompareTo } // class BWInternalVector // Vector of vector of char public class BWInternalMatrix : ArrayList { public void Permute( ArrayList v ) { this.Clear(); for ( int i = v.Count; 0 < i ; --i ) { BWInternalVector p = new BWInternalVector(); int pos = i; for ( int j = 0; j < v.Count; ++j, ++pos ) { if ( v.Count <= pos ) pos = 0; p.Add( v[pos] ); } Add( p ); } } public BWTransformationCode GetTransformationCode() { BWTransformationCode code = new BWTransformationCode(); for (int i = 0; i < Count; i++ ) { ArrayList v = (ArrayList)(this)[i]; code.Add( v[ v.Count-1 ] ); } return code; } } // class BWInternalMatrix // methods public BurrowsWheeler( char delimiter ) { m_delimiter = delimiter; } public BurrowsWheeler() { m_delimiter = ' '; } public BWTransformationCode Transform() { // build seed ArrayList v = new ArrayList(); v.Add( m_delimiter ); v.AddRange( this ); v.Add( m_delimiter ); // permute BWInternalMatrix matrix = new BWInternalMatrix(); System.Console.WriteLine("begin Permute ... "); matrix.Permute( v ); System.Console.WriteLine("begin Sort ... "); // sort lexically matrix.Sort(); System.Console.WriteLine("finished Sort ... "); // return right most row return matrix.GetTransformationCode(); } public void InverseTransform( BWTransformationCode code ) { BWInternalMatrix matrix = new BWInternalMatrix(); // prepare for ( int i = 0; i < code.Count; i++ ) { matrix.Add( new BWInternalVector() ); } // insert data while sorting for ( int i = 0; i < matrix.Count; i++ ) { for ( int j = 0; j < code.Count; j++ ) { BWInternalVector current = (BWInternalVector) matrix[j]; current.Insert( 0, code[j] ); } matrix.Sort(); } // find result Clear(); for ( int i = 0; i < matrix.Count; i++ ) { BWInternalVector v = (BWInternalVector) matrix[i]; if ( (char) v[0] == m_delimiter && (char) v[ matrix.Count-1 ] == m_delimiter ) { for ( int j = 1; (j + 1) < matrix.Count; j++ ) { Add( v[j] ); } return; // terminate } } } // method InverseTransform public void StdOut() { StdOut( ' ' ); } public void StdOut( char delimiter ) { BWTransformationCode code = new BWTransformationCode(); code.AddRange( this ); code.StdOut( delimiter ); } public int FileRead( string fileSpec, int bufSize ) { Clear(); BWTransformationCode code = new BWTransformationCode(); int bytesRead = code.FileRead( fileSpec, bufSize ); AddRange( code ); return bytesRead; } public void FileWrite( string fileSpec ) { BWTransformationCode code = new BWTransformationCode(); code.AddRange( this ); code.FileWrite( fileSpec ); } // data protected char m_delimiter; public class Test { const string FILE_SPEC_ORG = @"..\..\original.txt"; const string FILE_SPEC_COM = @"..\..\compressed.txt"; const string FILE_SPEC_DEC = @"..\..\decompressed.txt"; const int BUF_RD_SZ = 1024; [STAThread] static void Main(string[] args) { BurrowsWheeler bw = new BurrowsWheeler( (char)127 ); int bytesRead = bw.FileRead( FILE_SPEC_ORG, BUF_RD_SZ ); System.Console.WriteLine( "Transforming ... "); BWTransformationCode d = bw.Transform(); System.Console.WriteLine( "Finished transforming ... "); d.FileWrite( FILE_SPEC_COM ); BurrowsWheeler wd = new BurrowsWheeler( (char)127 ); System.Console.WriteLine("Inverse Transforming ... "); wd.InverseTransform( d ); System.Console.WriteLine("Finished inverse transforming ... "); wd.FileWrite( FILE_SPEC_DEC ); System.Console.WriteLine("Hello, world!"); } // method Main } // class Test } // class BurrowsWheeler } // namespace Compress