// Prefixer.cpp : Defines the entry point for the console application. // (C) 2010, Andrew Polar under GPL ver. 3. // Released April, 2010 // // LICENSE // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 3 of // the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details at // Visit . // #include #include #include #include #include unsigned long long bit_holding_buffer64; unsigned char* global_pointer_to_data_buffer; unsigned char bit_counter; unsigned int current_byte; ///////auxiliary functions/////////////////////////////// template double calculate_entropy(T* data, int data_size) { int min, max, counter, buffer_size; int* buffer; double entropy; double log2 = log(2.0); double prob; min = data[0]; max = data[0]; for (counter=0; counter max) max = data[counter]; } buffer_size = max - min + 1; buffer = (int*)malloc(buffer_size * sizeof(int)); memset(buffer, 0x00, buffer_size * sizeof(int)); for (counter=0; counter 0) { prob = (double)buffer[counter]; prob /= (double)data_size; entropy += log(prob)/log2*prob; } } entropy *= -1.0; if (buffer) free(buffer); return entropy; } int getFileSize(char* fileName) { FILE* f; if ((f = fopen(fileName, "rb")) == NULL) return -1; fseek(f, 0, SEEK_END); int bytes = ftell(f); fclose(f); return bytes; } //returns the bitlength of passed number template static __inline int bitLen(AllIntegers n) { if (n < 0) n = -n; int cnt = 1; while ((n >>= 1) > 0) ++cnt; return cnt; } //////end auxiliary functions list////////////////// //input output static __inline int readBits(int max_len) { if ((bit_counter - max_len) < 0) { bit_holding_buffer64 <<= 8; bit_holding_buffer64 |= global_pointer_to_data_buffer[current_byte++]; bit_holding_buffer64 <<= 8; bit_holding_buffer64 |= global_pointer_to_data_buffer[current_byte++]; bit_holding_buffer64 <<= 8; bit_holding_buffer64 |= global_pointer_to_data_buffer[current_byte++]; bit_holding_buffer64 <<= 8; bit_holding_buffer64 |= global_pointer_to_data_buffer[current_byte++]; bit_counter += 32; } return (int)(bit_holding_buffer64 >> (bit_counter - max_len)) & ((1 << max_len)-1); } static __inline void writeBits(int codeLen, int code) { bit_holding_buffer64 <<= codeLen; bit_holding_buffer64 |= code; bit_counter += codeLen; if (bit_counter > 31) { unsigned char offset = bit_counter - 32; global_pointer_to_data_buffer[current_byte++] = unsigned char(bit_holding_buffer64 >> (offset + 24)); global_pointer_to_data_buffer[current_byte++] = unsigned char(bit_holding_buffer64 >> (offset + 16)); global_pointer_to_data_buffer[current_byte++] = unsigned char(bit_holding_buffer64 >> (offset + 8)); global_pointer_to_data_buffer[current_byte++] = unsigned char(bit_holding_buffer64 >> offset); bit_counter -= 32; } } //end input output class Prefixer { public: Prefixer(); ~Prefixer(); bool Initialize (short AlphabetSize, bool isDecoding); void ReleaseMemory(); void EncodeSymbol (short value); short DecodeSymbol (); void FlushBits (); private: short GetSymbol (short value); short InvertSymbol (short value); void UpdateTables (short value); void BuildTree (bool isDecoding); bool allocateDecodingTables(); void freeDecodingTables(); int *m_fragment_len, *m_shift, *m_lowest_code, *m_offset, *m_code; short *m_frequency, *m_symbol, *m_inverse; unsigned char *m_codeLen; int m_alphabet_size, m_max_code_len, m_tableSize, m_counter, m_size_of_adaptation; int m_max_frequency_limit, m_min_size_of_adaptation, m_max_size_of_adaptation; }; bool Prefixer::Initialize(short AlphabetSize, bool isDecoding) { m_alphabet_size = AlphabetSize; m_frequency = m_symbol = m_inverse = 0; m_frequency = (short*) malloc(m_alphabet_size * sizeof(*m_frequency)); m_symbol = (short*) malloc(m_alphabet_size * sizeof(*m_symbol)); m_inverse = (short*) malloc(m_alphabet_size * sizeof(*m_inverse)); m_codeLen = (unsigned char*) malloc(m_alphabet_size * sizeof(*m_codeLen)); m_code = (int*) malloc(m_alphabet_size * sizeof(*m_code)); if (m_frequency == 0) return false; if (m_symbol == 0) return false; if (m_inverse == 0) return false; if (m_codeLen == 0) return false; if (m_code == 0) return false; for (int i=0; i= m_max_frequency_limit) { for (int j=0; j>= 1; } } if (m_symbol[value] == 0) { return; } short npos = -1; short pos = m_symbol[value]; short freq = m_frequency[value]; for (short i=pos-1; i>=0; --i) { if (freq > m_frequency[m_inverse[i]]) { npos = i; } else { break; } } if (npos < 0) { return; } short buf = m_symbol[m_inverse[pos]]; m_symbol[m_inverse[pos]] = m_symbol[m_inverse[npos]]; m_symbol[m_inverse[npos]] = buf; buf = m_inverse[pos]; m_inverse[pos] = m_inverse[npos]; m_inverse[npos] = buf; } short Prefixer::GetSymbol(short value) { return m_symbol[value]; } short Prefixer::InvertSymbol(short value) { return m_inverse[value]; } void Prefixer::BuildTree(bool isDecoding) { short* tmpFreq = (short*)malloc(m_alphabet_size * sizeof(*tmpFreq)); for (int i=0; i>= 1) > tmpFreq[i]) { ++m_codeLen[i]; } if (m_codeLen[i] > m_max_code_len) { m_max_code_len = m_codeLen[i]; } } //find codes m_code[0] = 0; for (int i=1; i= m_size_of_adaptation) { BuildTree(false); m_counter = 0; if (m_size_of_adaptation < m_max_size_of_adaptation) m_size_of_adaptation <<= 1; } } void Prefixer::FlushBits() { if (bit_counter > 0) { writeBits(32-bit_counter, 0); } } short Prefixer::DecodeSymbol() { int address = readBits(m_max_code_len); short symbol = 0; bool isFound = false; for (int i=0; i> m_shift[i]) - m_lowest_code[i]; if (pos < m_fragment_len[i]) { symbol = InvertSymbol(pos + m_offset[i]); bit_counter -= m_max_code_len - m_shift[i]; isFound = true; break; } } if (!isFound) { printf("Error decoding\n"); } UpdateTables(symbol); ++m_counter; if (m_counter >= m_size_of_adaptation) { BuildTree(true); m_counter = 0; if (m_size_of_adaptation < m_max_size_of_adaptation) m_size_of_adaptation <<= 1; } return symbol; } class ContextManager { public: ContextManager(int alphabet_size, bool isDecoding); ~ContextManager(); void EncodeSymbol(short symbol); void CompleteAndFlushBits(); short DecodeSymbol(); private: Prefixer* m_ptr_prefixer; int m_context_size, m_context_bit_len, m_alphabet_size; int m_context, m_shift, m_mask; }; ContextManager::ContextManager(int alphabet_size, bool isDecoding) { m_alphabet_size = alphabet_size; m_context_bit_len = 11; m_context_size = 1<EncodeSymbol(data[i]); } coder->CompleteAndFlushBits(); delete coder; clock_t end = clock(); printf("Encoding, time %2.3f s.\n", (double)(end - start)/CLOCKS_PER_SEC); int encoded_size = current_byte; //end encoding //decoding start = clock(); unsigned char* test = (unsigned char*)malloc((data_size + data_size/4) * sizeof(*test)); ContextManager* decoder = new ContextManager(256, true); int test_size = 0; while (true) { short value = decoder->DecodeSymbol(); if (value == 256) break; test[test_size++] = (unsigned char)value; } delete decoder; end = clock(); printf("Decoding, time %2.3f s.\n", (double)(end - start)/CLOCKS_PER_SEC); //end decoding bool isOK = true; if (test_size != data_size) { printf("Data mismatch %d %d\n", test_size, data_size); isOK = false; } else { for (int i=0; i