hostile-takeover/SpiffLib/compressor.cs
2014-07-06 17:47:28 -07:00

318 lines
7.4 KiB
C#

using System;
using System.IO;
using System.Collections;
using System.Diagnostics;
namespace SpiffLib {
enum CodeType { Literal, Match, Run, End };
struct Code {
public byte[] ab;
public int ibSrc;
public int cbSrc;
public int cBitsSavings;
public CodeType cotype;
};
public class Compressor {
byte[] m_abSrc;
ArrayList[] m_mpbalsIndexes = new ArrayList[256];
public static byte[] CompressChunk(byte[] abChunk) {
#if false
return abChunk;
#else
Compressor comp = new Compressor();
return comp.Compress(abChunk);
#endif
}
public Compressor() {
}
void PushIndexes(int ib, int cb) {
while (cb-- != 0) {
byte b = m_abSrc[ib];
if (m_mpbalsIndexes[b] == null)
m_mpbalsIndexes[b] = new ArrayList();
m_mpbalsIndexes[b].Add(ib);
ib++;
}
}
void PopIndexes(int ib, int cb) {
ib += cb - 1;
while (cb-- != 0) {
byte b = m_abSrc[ib];
ArrayList alsT = m_mpbalsIndexes[b];
Debug.Assert(ib == (int)alsT[alsT.Count - 1]);
alsT.RemoveAt(alsT.Count - 1);
ib--;
}
}
Code[] FindMatchCodes(int ibSrc) {
ArrayList alsCode = new ArrayList();
ArrayList alsIndexes = m_mpbalsIndexes[m_abSrc[ibSrc]];
if (alsIndexes != null) {
// Find the longest match.
int ibLongest = -1;
int cbLongest = 0;
for (int iIndex = alsIndexes.Count - 1; iIndex >= 0; iIndex--) {
// The largest index is 16383.
int ibStart = (int)alsIndexes[iIndex];
if (ibSrc - ibStart > 16383)
break;
// Check length of match.
Debug.Assert(m_abSrc[ibSrc] == m_abSrc[ibStart]);
int c = 1;
int cbT = (ibSrc - ibStart) < (m_abSrc.Length - ibSrc) ? (ibSrc - ibStart) : (m_abSrc.Length - ibSrc);
for (int ibT = ibStart + 1; ibT < ibStart + cbT; ibT++) {
if (m_abSrc[ibT] != m_abSrc[ibSrc + (ibT - ibStart)])
break;
c++;
// Max length allowed is 129
if (c == 129)
break;
}
// The match has to be >= 2 to be useful
if (c < 2)
continue;
// Remember this match if it is longer than what we know about
// already
if (c > cbLongest) {
cbLongest = c;
ibLongest = ibStart;
}
}
// Add best known matches
if (ibLongest != -1) {
Code code;
int ib = ibSrc - ibLongest;
int cb = cbLongest;
if (cb > 8 || ib >= 8192) {
// 3 bits old count (must be 0 since this is escape code)
// 7 bits count (0-127 translates to 2-129 count)
// 14 bits index (1-16383, 0 means end if count is also 0)
Debug.Assert(ib <= 16384);
Debug.Assert(cb <= 129);
code.ab = new byte[3];
code.ab[0] = (byte)(ib >> 9);
code.ab[1] = (byte)((ib >> 1) & 0xff);
code.ab[2] = (byte)((ib << 7) | (cb - 2));
} else {
// 3 bits count (1-7 translates to 2-8, 0 reserved for escape code)
// 13 bits index (1-8191, 0 means end if count is also 0)
Debug.Assert(ib < 8192);
Debug.Assert(cb <= 8);
code.ab = new byte[2];
code.ab[0] = (byte)(((cb - 1) << 5) | (ib >> 8));
code.ab[1] = (byte)ib;
}
code.cotype = CodeType.Match;
code.ibSrc = ibSrc;
code.cbSrc = cb;
code.cBitsSavings = (code.cbSrc - code.ab.Length) * 8;
alsCode.Add(code);
}
}
// Add literal to round out the possibilities
Code codeLiteral;
codeLiteral.ab = new byte[1];
codeLiteral.ab[0] = m_abSrc[ibSrc];
codeLiteral.cBitsSavings = 0;
codeLiteral.cbSrc = 1;
codeLiteral.ibSrc = ibSrc;
codeLiteral.cotype = CodeType.Literal;
alsCode.Add(codeLiteral);
// Return them all
return (Code[])alsCode.ToArray(typeof(Code));
}
Code FindBestCode(int ibSrc, int cLookAhead) {
// First find all possible Code at this point, independent of savings
ArrayList alsCode = new ArrayList();
alsCode.AddRange(FindMatchCodes(ibSrc));
// See which approach yields the smallest size over the next cLookAhead
// encodings. It is easy to get different results here. The larger
// cLookAhead is the better (but slower)
int icodeBest = 0;
int cBitsSavingsBest = Int32.MinValue;
for (int i = 0; i < alsCode.Count; i++) {
Code code = (Code)alsCode[i];
int cBitsSavingsT = code.cBitsSavings;
// Look ahead if asked to
if (cLookAhead != 0 && ibSrc + code.cbSrc < m_abSrc.Length) {
int ibSrcT = ibSrc + code.cbSrc;
PushIndexes(ibSrc, code.cbSrc);
for (int iT = 0; iT < cLookAhead; iT++) {
Code codeT = FindBestCode(ibSrcT, 0);
PushIndexes(ibSrcT, codeT.cbSrc);
ibSrcT += codeT.cbSrc;
cBitsSavingsT += codeT.cBitsSavings;
if (ibSrcT >= m_abSrc.Length)
break;
}
PopIndexes(ibSrc, ibSrcT - ibSrc);
}
// Check against best so far
if (cBitsSavingsT > cBitsSavingsBest) {
cBitsSavingsBest = cBitsSavingsT;
icodeBest = i;
}
}
// Get and return the best
return (Code)alsCode[icodeBest];
}
private Code[] Codify(int ibSrc) {
// Get the codes for this stream
ArrayList alsCodes = new ArrayList();
while (ibSrc < m_abSrc.Length) {
Code code = FindBestCode(ibSrc, 0); // 16);
PushIndexes(ibSrc, code.cbSrc);
alsCodes.Add(code);
ibSrc += code.cbSrc;
}
// Add the End code and done
Code codeEnd;
codeEnd.ab = new byte[2];
codeEnd.ab[0] = (byte)0;
codeEnd.ab[1] = (byte)0;
codeEnd.cBitsSavings = 0;
codeEnd.cbSrc = 0;
codeEnd.ibSrc = 0;
codeEnd.cotype = CodeType.End;
alsCodes.Add(codeEnd);
return (Code[])alsCodes.ToArray(typeof(Code));
}
public byte[] Compress(byte[] abSrc) {
// Create compression codes
m_abSrc = abSrc;
Code[] acode = Codify(0);
// Turn into a byte stream
ArrayList alsDst = new ArrayList();
ArrayList alsDstT = new ArrayList();
int ibitFlags = 7;
byte bFlags = 0;
for (int icode = 0; icode < acode.Length; icode++) {
Code code = acode[icode];
if (code.cotype == CodeType.Literal)
bFlags |= (byte)(1 << ibitFlags);
alsDstT.AddRange(code.ab);
ibitFlags--;
if (ibitFlags < 0 || icode == acode.Length - 1) {
ibitFlags = 7;
alsDst.Add(bFlags);
alsDst.AddRange((byte[])alsDstT.ToArray(typeof(byte)));
bFlags = 0;
alsDstT.Clear();
}
}
// Done
return (byte[])alsDst.ToArray(typeof(byte));
}
public static byte[] DecompressChunk(byte[] abChunk) {
ArrayList alsDst = new ArrayList();
int ib = 0;
bool fDone = false;
while (!fDone) {
// Get next flags
byte bFlags = abChunk[ib];
ib++;
for (int ibitFlags = 7; ibitFlags >= 0; ibitFlags--) {
// Literal or code?
if ((bFlags & (1 << ibitFlags)) != 0) {
alsDst.Add(abChunk[ib]);
ib++;
continue;
}
// Get a code
ushort code = (ushort)(abChunk[ib] << 8);
ib++;
code |= (ushort)abChunk[ib];
ib++;
// Extended code? Check for count == 0.
int ibBackwards = (int)(code & 0x1fff);
int cb;
if ((code & 0xe000) != 0) {
// Count != 0, so not an extended code
cb = (byte)((code & 0xe000) >> 13) + 1;
} else {
// Extended code. End?
if (ibBackwards == 0) {
fDone = true;
break;
}
// Extended match
ibBackwards = (int)(code << 1) | (int)(abChunk[ib] >> 7);
cb = (int)(abChunk[ib] & 0x7f) + 2;
ib++;
}
// Copy this chunk into the output
byte[] abAdd = new byte[cb];
alsDst.CopyTo(alsDst.Count - ibBackwards, abAdd, 0, cb);
alsDst.AddRange(abAdd);
}
}
return (byte[])alsDst.ToArray(typeof(byte));
}
}
}