// RunCoder1.cpp : Defines demo for adaptive order 1 range coder. // (C) 2009, Andrew Polar under GPL ver. 3. // First released Feb. 10, 2009. // Latest version Mar. 30, 2009. // // LICENSE // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 3 of // the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details at // Visit . // #include #include #include #include #include FILE* out = 0; FILE* in = 0; //changeable parameters const unsigned char Length_of_context_buffer = 12; //changing length require change in get_selected_context function int Length_of_adaptation_buffer = 4096; //Globals unsigned MAX_BASE = 15; //can be changed but not recommended, may work but may become unstable int current_byte; //position of current byte in result data unsigned char current_bit; //postion of current bit in result data unsigned long long int operation_buffer64; int operation_buffer32; int product, shift; unsigned char in_byte, out_byte; int result_size; //Some look up tables for fast processing static int output_mask[8][32]; static unsigned char bytes_plus [8][64]; static unsigned char edge_mask[8] = {0xff, 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01}; static unsigned char shift_table[8] = {24, 16, 8, 0}; unsigned long long int overflow_indicator = (0xffffffffffffffffULL) << (MAX_BASE * 2); int product_mask = 0xFFFFFFFF >> (32 - MAX_BASE); ////////////////////////////////////////////////////////////// //auxiliary function double entropyBytes(unsigned char* data, int size) { double log2 = log(2.0); int* buffer = (int*)malloc(256*sizeof(int)); memset(buffer, 0x00, 256*sizeof(int)); for (int counter=0; counter 0) { double prob = (double)buffer[counter]; prob /= (double)size; entropy += log(prob)/log2*prob; } } entropy *= -1.0; if (buffer) free(buffer); return entropy; } ////////////////////////////////////////////////// //this function should be calle prior to anything else void make_look_ups(int alphabet_size) { int sign_mask[8] = { 0xffffffff, 0x7fffffff, 0x3fffffff, 0x1fffffff, 0x0fffffff, 0x07ffffff, 0x03ffffff, 0x01ffffff }; for (int i=0; i<8; i++) { for (int j=0; j<32; j++) { output_mask[i][j] = sign_mask[i]; output_mask[i][j] >>= (32-i-j); output_mask[i][j] <<= (32-i-j); } } for (int i=0; i<8; i++) { for (int j=0; j<64; j++) { bytes_plus[i][j] = j/8; if ((i + j%8) > 7) ++bytes_plus[i][j]; } } } //The key function that takes product = product*y and convert it to //mantissa and exponent. Mantissa have number of bits equal to //length of the product_mask. inline void SafeProduct(int y) { int p = product * y; if ((p >> shift) >product_mask) { p >>= shift; while (p > product_mask) { p >>= 1; ++shift; } } else { while (shift >= 0) { --shift; if ((p >> shift) > product_mask) { break; } } ++shift; p >>= shift; } product = p; } inline void readBits(int bits) { unsigned end_byte = current_byte + bytes_plus[current_bit][bits]; int buffer = (in_byte & edge_mask[current_bit]); unsigned char total_bits = 8 - current_bit; for (unsigned i=current_byte+1; i<=end_byte; ++i) { in_byte = getc(in); (buffer <<= 8) |= in_byte; total_bits += 8; } buffer >>= (total_bits - bits); operation_buffer32 <<= bits; operation_buffer32 |= buffer; current_byte = end_byte; current_bit = (bits + current_bit)%8; if (current_byte >= result_size) { printf("Abnormal termination, attempt to read data out of the data buffer\n"); exit(1); } } inline void writeBits(int bits) { int buffer = (int)((operation_buffer64 >> current_bit) >> 32); buffer &= output_mask[current_bit][bits]; unsigned bytes_plus2 = bytes_plus[current_bit][bits]; current_bit = (bits + current_bit)%8; for (unsigned i=0; i> shift_table[i]); putc(out_byte, out); out_byte = 0; } out_byte |= (buffer >> shift_table[bytes_plus2]); } class Predictor { public: Predictor(int History, int Prediction); ~Predictor(); void SetUpInitial(); void UpdateTables(int address, unsigned char value); unsigned char GetSymbol(int address, unsigned char value); unsigned char InvertSymbol(int address, unsigned char value); void PrintStatistics(); private: unsigned char** frequency; unsigned char** symbol; unsigned char** inverse; int history_size, predict_size; }; Predictor::Predictor(int History, int Prediction) { history_size = History; predict_size = Prediction; frequency = (unsigned char**) malloc(history_size*sizeof(unsigned char*)); symbol = (unsigned char**) malloc(history_size*sizeof(unsigned char*)); inverse = (unsigned char**) malloc(history_size*sizeof(unsigned char*)); for (int i=0; i>= 1; } } if (symbol[address][value] == 0) { return; } unsigned char pos = symbol[address][value]; unsigned int Freq = frequency[address][value]; int npos = -1; for (int i=pos-1; i>=0; --i) { if (Freq > frequency[address][inverse[address][i]]) { npos = i; } else { break; } } if (npos < 0) { return; } unsigned char buf = symbol[address][inverse[address][pos]]; symbol[address][inverse[address][pos]] = symbol[address][inverse[address][npos]]; symbol[address][inverse[address][npos]] = buf; buf = inverse[address][pos]; inverse[address][pos] = inverse[address][npos]; inverse[address][npos] = buf; } unsigned char Predictor::GetSymbol(int address, unsigned char value) { return symbol[address][value]; } unsigned char Predictor::InvertSymbol(int address, unsigned char value) { return inverse[address][value]; } void Predictor::PrintStatistics() { for (int i=0; i max) { //we need correction, Total should not be > max, that rarely or never happen. i = 2; while (iTotal > max) { if (normalfrequency[i] > 1) { --normalfrequency[i]; --iTotal; } ++i; if (i == alphabetsize) i = 2; } } //End of normalization //Making cumulative frequencies cumulative[0] = 0; for (i=1; i= memorylimit) { for (int i=0; i>= 1; } } } void RunCoder::ProcessSymbol(int value) { while (((operation_buffer64 << (MAX_BASE - shift)) & overflow_indicator) == overflow_indicator) { printf("Possible buffer overflow is corrected at value %d\n", value); //this is zero flag output in order to avoid buffer overflow //rarely happen, cumulative[0] = 0, frequency[0] = 1 writeBits(MAX_BASE-shift); operation_buffer64 <<= (MAX_BASE - shift); operation_buffer64 += cumulative[0] * product; SafeProduct(normalfrequency[0]); //in result of this operation shift = 0 } writeBits(MAX_BASE-shift); operation_buffer64 <<= (MAX_BASE - shift); operation_buffer64 += cumulative[value+2] * product; SafeProduct(normalfrequency[value+2]); } void RunCoder::FlushRemainedBuffer() { while (((operation_buffer64 << (MAX_BASE - shift)) & overflow_indicator) == overflow_indicator) { printf("Possible buffer overflow is corrected at value\n"); //this is zero flag output in order to avoid buffer overflow //rarely happen, cumulative[0] = 0, frequency[0] = 1 writeBits(MAX_BASE-shift); operation_buffer64 <<= (MAX_BASE - shift); operation_buffer64 += cumulative[0] * product; SafeProduct(normalfrequency[0]); //in result of this operation shift = 0 } writeBits(MAX_BASE-shift); operation_buffer64 <<= (MAX_BASE - shift); operation_buffer64 += cumulative[1] * product; SafeProduct(normalfrequency[1]); // //flushing remained 64 bits writeBits(24); operation_buffer64 <<= 24; writeBits(24); operation_buffer64 <<= 24; writeBits(16); operation_buffer64 <<= 16; } int RunCoder::DecodeSymbol() { readBits(MAX_BASE-shift); int ID = operation_buffer32/product; operation_buffer32 -= product * cumulative[symbol[ID]]; SafeProduct(normalfrequency[symbol[ID]]); return symbol[ID]; } //End RunCoder ///////////////////////////////////////////////////// int getFileSize(char* fileName) { FILE* f; if ((f = fopen(fileName, "rb")) == NULL) return -1; fseek(f, 0, SEEK_END); int bytes = ftell(f); fclose(f); return bytes; } inline int get_selected_context(int memory_buffer) { return (memory_buffer & 0xFF) | ((memory_buffer & 0xF000)>>4); } void Encode(int data_size) { make_look_ups(256); const int context_size = 1<Initialize(256) == false) { printf("Initialization failed\n"); exit(1); } encoderdefault->SetDefaultFrequencies(); encoderdefault->Renormalize(false); Predictor* pp = new Predictor(context_size, 256); operation_buffer64 = 0; product = 1; shift = 0; current_byte = 0; current_bit = 0; int context = 0; int context_mask = context_size-1; unsigned char symbol; for (int i=0; iGetSymbol(context, in_byte); pp->UpdateTables(context, in_byte); //end encoderdefault->ProcessSymbol(symbol); encoderdefault->UpdateRawFrequency(symbol, false); context <<= 8; context |= in_byte; //context &= context_mask; context = get_selected_context(context); //a little bit more intelligent context construction } //last step needs output of remained buffer in_byte = getc(in); symbol = pp->GetSymbol(context, in_byte); pp->UpdateTables(context, in_byte); encoderdefault->ProcessSymbol(symbol); encoderdefault->UpdateRawFrequency(symbol, false); encoderdefault->FlushRemainedBuffer(); //we have to write one last byte putc(out_byte, out); delete pp; delete encoderdefault; } void Decode() { in_byte = getc(in); make_look_ups(256); const int context_size = 1<Initialize(256) == false) { printf("Unexpected bug\n"); exit(1); } decoderdefault->SetDefaultFrequencies(); decoderdefault->Renormalize(true); Predictor* pp = new Predictor(context_size, 256); operation_buffer32 = 0; product = 1; shift = 0; current_byte = 0; current_bit = 0; int cnt = 0; int value; readBits(32); readBits(32); int context = 0; int context_mask = context_size-1; unsigned char symbol; while (true) { value = decoderdefault->DecodeSymbol(); if (value == 1) { //printf("Termination flag is recognized\n"); break; } if (value != 0) { //0 must be ignored value -= 2; decoderdefault->UpdateRawFrequency(value, true); //revert alphabet symbol = pp->InvertSymbol(context, value); pp->UpdateTables(context, symbol); //end context <<= 8; context |= symbol; //context &= context_mask; context = get_selected_context(context); putc(symbol, out); } } delete pp; delete decoderdefault; } int main(int argc, char** argv) { if (argc != 4) { printf("RunCoder1 (C) 2009, Andrew Polar\n"); printf("This is free software under GPL, http://www.gnu.org/copyleft/gpl.html\n\n"); printf("Usage for compression : runcoder1 c source.bmp result.dat\n"); printf("Usage for decompression: runcoder1 d result.dat test.bmp\n"); exit(0); } if (argv[1][0] == 'c') { clock_t start_encoding = clock(); printf("Reading the data ...\n\n"); int data_size = getFileSize(argv[2]); if (data_size < 0) { printf("File %s not found\n", argv[2]); return 1; } in = fopen(argv[2], "rb"); printf("Original size %10d bytes\n", data_size); out = fopen(argv[3], "wb"); Encode(data_size); fflush(out); fclose(out); fclose(in); int result_size = getFileSize(argv[3]); printf("Actual encoded size for adaptive coder %10d bytes\n\n", result_size); clock_t end_encoding = clock(); printf("Time for encoding %2.3f sec.\n", (double)(end_encoding - start_encoding)/CLOCKS_PER_SEC); double ratio = (double)result_size; ratio /= (double)data_size; printf("Compression ratio %2.3f of original size.\n\n", ratio); } else if (argv[1][0] == 'd') { clock_t start_decoding = clock(); printf("Reading the data ...\n\n"); result_size = getFileSize(argv[2]); if (result_size < 0) { printf("File %s not found\n", argv[2]); return 1; } printf("Compressed file size %10d bytes\n", result_size); in = fopen(argv[2], "rb"); out = fopen(argv[3], "wb"); Decode(); fflush(out); fclose(out); fclose(in); clock_t end_decoding = clock(); printf("Decompression is ended, time is %2.3f sec.\n\n", (double)(end_decoding - start_decoding)/CLOCKS_PER_SEC); } else { printf("Misformatted command.\n\n"); } return 0; }