//This is Open source BWT, written by Andrew Polar. //Code is not yet optimized and tested. Do not use it 'as is' before //careful testing or further notice of the author. //If you decide to use it I wish you luck. Reproduction is allowed. using System; using System.IO; namespace Browlz { //This is slightly modified quick sort, taken from //http://www.snippets.24bytes.com/2010/06/quick-sort.html public class QSortModified { public QSortModified() { } private bool isGreater(ushort[] data, int p1, int p2, int lastPosition) { //if all elements in array are equal the loop will run forever while (true) { if (data[p1] == data[p2]) { if (++p1 > lastPosition) p1 = 0; if (++p2 > lastPosition) p2 = 0; } else { break; } } return (data[p1] > data[p2]); //changing sign into opposite iverts result } private void switchElements(ref int x, ref int y) { int buffer = x; x = y; y = buffer; } private int partition(ushort[] sourceData, int[] position, int left, int right, int lastPosition) { for (int i = left; i < right; i++) { if (isGreater(sourceData, position[right], position[i], lastPosition)) { switchElements(ref position[left], ref position[i]); ++left; } } switchElements(ref position[right], ref position[left]); return left; } public void quickSort(ushort[] sourceData, int[] position, int left, int right, int lastPosition) { if (left < right) { int pivot = partition(sourceData, position, left, right, lastPosition); quickSort(sourceData, position, left, pivot - 1, lastPosition); quickSort(sourceData, position, pivot + 1, right, lastPosition); } } } class BWT { public BWT() { } public int Direct(ushort[] data) { //make array holding the positions int[] position = new int[data.Length]; for (int i = 0; i < position.Length; ++i) position[i] = i; //quick sort, keeping track of the positions QSortModified qsortM = new QSortModified(); qsortM.quickSort(data, position, 0, data.Length - 1, data.Length - 1); //make a copy of original data ushort[] copy = new ushort[data.Length]; System.Buffer.BlockCopy(data, 0, copy, 0, data.Length * sizeof(ushort)); //make output data as previous element in sorted result for (int i = 0; i < data.Length; ++i) { if ((position[i] - 1) < 0) data[i] = copy[data.Length - 1]; else data[i] = copy[position[i] - 1]; } //find position where first row moved int k = 0; while (position[k] != 0) { ++k;} return k; } //The invert BWT is implemented according to algorithm published in //http://michael.dipperstein.com/bwt/index.html public void Inverse(ushort[] data, int firstLinePosition, int dataBitSize) { //allocate map array int mapSize = 1<= 0; --i) { index = map[data[i+1]] + counts[index]; data[i] = copy[index]; } } } class Entry { static void Main(string[] args) { string fileName = "C:\\Users\\apolar\\projects\\files\\english.dic"; FileInfo fi = new FileInfo(fileName); if (!fi.Exists) { Console.WriteLine("File not found"); return; } int nSize = (int)fi.Length; if (nSize > 100000) nSize = 100000; FileStream fstream = new FileStream(fileName, FileMode.Open, FileAccess.Read); byte[] bData = new byte[nSize]; fstream.Read(bData, 0, nSize); fstream.Close(); ushort[] data = new ushort[nSize]; for (int i = 0; i < nSize; ++i) { data[i] = bData[i]; } ushort[] copy = new ushort[nSize]; System.Buffer.BlockCopy(data, 0, copy, 0, nSize * sizeof(ushort)); //BWT direct DateTime timeStamp = DateTime.Now; BWT bwt = new BWT(); int firstLinePosition = bwt.Direct(data); TimeSpan ts = DateTime.Now - timeStamp; string timeBWT = String.Format("{0:####0.###}", (double)(ts.Ticks) / (double)(TimeSpan.TicksPerSecond)); Console.WriteLine("Direct transform time {0}", timeBWT); //BWT inverse timeStamp = DateTime.Now; bwt.Inverse(data, firstLinePosition, 8); //8 is bitwise of the largest element in data ts = DateTime.Now - timeStamp; timeBWT = String.Format("{0:####0.###}", (double)(ts.Ticks) / (double)(TimeSpan.TicksPerSecond)); Console.WriteLine("Inverse transform time {0}", timeBWT); bool isOK = true; for (int i = 0; i < nSize; ++i) { if (data[i] != copy[i]) { isOK = false; break; } } if (isOK) Console.WriteLine("Round trip is OK"); else Console.WriteLine("Data mismatch"); } } }