#include
#include
#include
#include
#include
#include
#include
#include
/* -------- aux stuff ---------- */
void* mem_alloc(size_t item_size, size_t n_item)
{
size_t *x = calloc(1, sizeof(size_t)*2 + n_item * item_size);
x[0] = item_size;
x[1] = n_item;
return x + 2;
}
void* mem_extend(void *m, size_t new_n)
{
size_t *x = (size_t*)m - 2;
x = realloc(x, sizeof(size_t) * 2 + *x * new_n);
if (new_n > x[1])
memset((char*)(x + 2) + x[0] * x[1], 0, x[0] * (new_n - x[1]));
x[1] = new_n;
return x + 2;
}
inline void _clear(void *m)
{
size_t *x = (size_t*)m - 2;
memset(m, 0, x[0] * x[1]);
}
#define _new(type, n) mem_alloc(sizeof(type), n)
#define _del(m) { free((size_t*)(m) - 2); m = 0; }
#define _len(m) *((size_t*)m - 1)
#define _setsize(m, n) m = mem_extend(m, n)
#define _extend(m) m = mem_extend(m, _len(m) * 2)
/* ----------- LZW stuff -------------- */
typedef uint8_t byte;
typedef uint16_t ushort;
#define M_CLR 256 /* clear table marker */
#define M_EOD 257 /* end-of-data marker */
#define M_NEW 258 /* new code index */
/* encode and decode dictionary structures.
for encoding, entry at code index is a list of indices that follow current one,
i.e. if code 97 is 'a', code 387 is 'ab', and code 1022 is 'abc',
then dict[97].next['b'] = 387, dict[387].next['c'] = 1022, etc. */
typedef struct {
ushort next[256];
} lzw_enc_t;
/* for decoding, dictionary contains index of whatever prefix index plus trailing
byte. i.e. like previous example,
dict[1022] = { c: 'c', prev: 387 },
dict[387] = { c: 'b', prev: 97 },
dict[97] = { c: 'a', prev: 0 }
the "back" element is used for temporarily chaining indices when resolving
a code to bytes
*/
typedef struct {
ushort prev, back;
byte c;
} lzw_dec_t;
byte* lzw_encode(byte *in, int max_bits)
{
int len = _len(in), bits = 9, next_shift = 512;
ushort code, c, nc, next_code = M_NEW;
lzw_enc_t *d = _new(lzw_enc_t, 512);
if (max_bits > 15) max_bits = 15;
if (max_bits < 9 ) max_bits = 12;
byte *out = _new(ushort, 4);
int out_len = 0, o_bits = 0;
uint32_t tmp = 0;
inline void write_bits(ushort x) {
tmp = (tmp << bits) | x;
o_bits += bits;
if (_len(out) <= out_len) _extend(out);
while (o_bits >= 8) {
o_bits -= 8;
out[out_len++] = tmp >> o_bits;
tmp &= (1 << o_bits) - 1;
}
}
//write_bits(M_CLR);
for (code = *(in++); --len; ) {
c = *(in++);
if ((nc = d[code].next[c]))
code = nc;
else {
write_bits(code);
nc = d[code].next[c] = next_code++;
code = c;
}
/* next new code would be too long for current table */
if (next_code == next_shift) {
/* either reset table back to 9 bits */
if (++bits > max_bits) {
/* table clear marker must occur before bit reset */
write_bits(M_CLR);
bits = 9;
next_shift = 512;
next_code = M_NEW;
_clear(d);
} else /* or extend table */
_setsize(d, next_shift *= 2);
}
}
write_bits(code);
write_bits(M_EOD);
if (tmp) write_bits(tmp);
_del(d);
_setsize(out, out_len);
return out;
}
byte* lzw_decode(byte *in)
{
byte *out = _new(byte, 4);
int out_len = 0;
inline void write_out(byte c)
{
while (out_len >= _len(out)) _extend(out);
out[out_len++] = c;
}
lzw_dec_t *d = _new(lzw_dec_t, 512);
int len, j, next_shift = 512, bits = 9, n_bits = 0;
ushort code, c, t, next_code = M_NEW;
uint32_t tmp = 0;
inline void get_code() {
while(n_bits < bits) {
if (len > 0) {
len --;
tmp = (tmp << 8) | *(in++);
n_bits += 8;
} else {
tmp = tmp << (bits - n_bits);
n_bits = bits;
}
}
n_bits -= bits;
code = tmp >> n_bits;
tmp &= (1 << n_bits) - 1;
}
inline void clear_table() {
_clear(d);
for (j = 0; j < 256; j++) d[j].c = j;
next_code = M_NEW;
next_shift = 512;
bits = 9;
};
clear_table(); /* in case encoded bits didn't start with M_CLR */
for (len = _len(in); len;) {
get_code();
if (code == M_EOD) break;
if (code == M_CLR) {
clear_table();
continue;
}
if (code >= next_code) {
fprintf(stderr, "Bad sequence\n");
_del(out);
goto bail;
}
d[next_code].prev = c = code;
while (c > 255) {
t = d[c].prev; d[t].back = c; c = t;
}
d[next_code - 1].c = c;
while (d[c].back) {
write_out(d[c].c);
t = d[c].back; d[c].back = 0; c = t;
}
write_out(d[c].c);
if (++next_code >= next_shift) {
if (++bits > 16) {
/* if input was correct, we'd have hit M_CLR before this */
fprintf(stderr, "Too many bits\n");
_del(out);
goto bail;
}
_setsize(d, next_shift *= 2);
}
}
/* might be ok, so just whine, don't be drastic */
if (code != M_EOD) fputs("Bits did not end in EOD\n", stderr);
_setsize(out, out_len);
bail: _del(d);
return out;
}
int main()
{
int i, fd = open("unixdict.txt", O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Can't read file\n");
return 1;
};
struct stat st;
fstat(fd, &st);
byte *in = _new(char, st.st_size);
read(fd, in, st.st_size);
_setsize(in, st.st_size);
close(fd);
printf("input size: %d\n", _len(in));
byte *enc = lzw_encode(in, 9);
printf("encoded size: %d\n", _len(enc));
byte *dec = lzw_decode(enc);
printf("decoded size: %d\n", _len(dec));
for (i = 0; i < _len(dec); i++)
if (dec[i] != in[i]) {
printf("bad decode at %d\n", i);
break;
}
if (i == _len(dec)) printf("Decoded ok\n");
_del(in);
_del(enc);
_del(dec);
return 0;
}
#include
#include
// Compress a string to a list of output symbols.
// The result will be written to the output iterator
// starting at "result"; the final iterator is returned.
template <typename Iterator>
Iterator compress(const std::string &uncompressed, Iterator result) {
// Build the dictionary.
int dictSize = 256;
std::map<std::string,int> dictionary;
for (int i = 0; i < 256; i++)
dictionary[std::string(1, i)] = i;
std::string w;
for (std::string::const_iterator it = uncompressed.begin();
it != uncompressed.end(); ++it) {
char c = *it;
std::string wc = w + c;
if (dictionary.count(wc))
w = wc;
else {
*result++ = dictionary[w];
// Add wc to the dictionary.
dictionary[wc] = dictSize++;
w = std::string(1, c);
}
}
// Output the code for w.
if (!w.empty())
*result++ = dictionary[w];
return result;
}
// Decompress a list of output ks to a string.
// "begin" and "end" must form a valid range of ints
template <typename Iterator>
std::string decompress(Iterator begin, Iterator end) {
// Build the dictionary.
int dictSize = 256;
std::map<int,std::string> dictionary;
for (int i = 0; i < 256; i++)
dictionary[i] = std::string(1, i);
std::string w(1, *begin++);
std::string result = w;
std::string entry;
for ( ; begin != end; begin++) {
int k = *begin;
if (dictionary.count(k))
entry = dictionary[k];
else if (k == dictSize)
entry = w + w[0];
else
throw "Bad compressed k";
result += entry;
// Add w+entry[0] to the dictionary.
dictionary[dictSize++] = w + entry[0];
w = entry;
}
return result;
}
#include
#include
#include
int main() {
std::vector<int> compressed;
compress("TOBEORNOTTOBEORTOBEORNOT", std::back_inserter(compressed));
copy(compressed.begin(), compressed.end(), std::ostream_iterator<int>(std::cout, ", "));
std::cout << std::endl;
std::string decompressed = decompress(compressed.begin(), compressed.end());
std::cout << decompressed << std::endl;
return 0;
}
using System;
using System.Collections.Generic;
using System.Text;
namespace LZW
{
public class Program
{
public static void Main(string[] args)
{
List<int> compressed = Compress("TOBEORNOTTOBEORTOBEORNOT");
Console.WriteLine(string.Join(", ", compressed));
string decompressed = Decompress(compressed);
Console.WriteLine(decompressed);
}
public static List<int> Compress(string uncompressed)
{
// build the dictionary
Dictionary<string, int> dictionary = new Dictionary<string, int>();
for (int i = 0; i < 256; i++)
dictionary.Add(((char)i).ToString(), i);
string w = string.Empty;
List<int> compressed = new List<int>();
foreach (char c in uncompressed)
{
string wc = w + c;
if (dictionary.ContainsKey(wc))
{
w = wc;
}
else
{
// write w to output
compressed.Add(dictionary[w]);
// wc is a new sequence; add it to the dictionary
dictionary.Add(wc, dictionary.Count);
w = c.ToString();
}
}
// write remaining output if necessary
if (!string.IsNullOrEmpty(w))
compressed.Add(dictionary[w]);
return compressed;
}
public static string Decompress(List<int> compressed)
{
// build the dictionary
Dictionary<int, string> dictionary = new Dictionary<int, string>();
for (int i = 0; i < 256; i++)
dictionary.Add(i, ((char)i).ToString());
string w = dictionary[compressed[0]];
compressed.RemoveAt(0);
StringBuilder decompressed = new StringBuilder(w);
foreach (int k in compressed)
{
string entry = null;
if (dictionary.ContainsKey(k))
entry = dictionary[k];
else if (k == dictionary.Count)
entry = w + w[0];
decompressed.Append(entry);
// new sequence; add it to the dictionary
dictionary.Add(dictionary.Count, w + entry[0]);
w = entry;
}
return decompressed.ToString();
}
}
}
package main
import (
"fmt"
"log"
"strings"
)
// Compress a string to a list of output symbols.
func compress(uncompressed string) []int {
// Build the dictionary.
dictSize := 256
// We actually want a map of []byte -> int but
// slices are not acceptable map key types.
dictionary := make(map[string]int, dictSize)
for i := 0; i < dictSize; i++ {
// Ugly mess to work around not having a []byte key type.
// Using `string(i)` would do utf8 encoding for i>127.
dictionary[string([]byte{byte(i)})] = i
}
var result []int
var w []byte
for i := 0; i < len(uncompressed); i++ {
c := uncompressed[i]
wc := append(w, c)
if _, ok := dictionary[string(wc)]; ok {
w = wc
} else {
result = append(result, dictionary[string(w)])
// Add wc to the dictionary.
dictionary[string(wc)] = dictSize
dictSize++
//w = []byte{c}, but re-using wc
wc[0] = c
w = wc[:1]
}
}
if len(w) > 0 {
// Output the code for w.
result = append(result, dictionary[string(w)])
}
return result
}
type BadSymbolError int
func (e BadSymbolError) Error() string {
return fmt.Sprint("Bad compressed symbol ", int(e))
}
// Decompress a list of output symbols to a string.
func decompress(compressed []int) (string, error) {
// Build the dictionary.
dictSize := 256
dictionary := make(map[int][]byte, dictSize)
for i := 0; i < dictSize; i++ {
dictionary[i] = []byte{byte(i)}
}
var result strings.Builder
var w []byte
for _, k := range compressed {
var entry []byte
if x, ok := dictionary[k]; ok {
//entry = x, but ensuring any append will make a copy
entry = x[:len(x):len(x)]
} else if k == dictSize && len(w) > 0 {
entry = append(w, w[0])
} else {
return result.String(), BadSymbolError(k)
}
result.Write(entry)
if len(w) > 0 {
// Add w+entry[0] to the dictionary.
w = append(w, entry[0])
dictionary[dictSize] = w
dictSize++
}
w = entry
}
return result.String(), nil
}
func main() {
compressed := compress("TOBEORNOTTOBEORTOBEORNOT")
fmt.Println(compressed)
decompressed, err := decompress(compressed)
if err != nil {
log.Fatal(err)
}
fmt.Println(decompressed)
}
import java.util.*;
public class LZW {
/** Compress a string to a list of output symbols. */
public static List<Integer> compress(String uncompressed) {
// Build the dictionary.
int dictSize = 256;
Map<String,Integer> dictionary = new HashMap<String,Integer>();
for (int i = 0; i < 256; i++)
dictionary.put("" + (char)i, i);
String w = "";
List<Integer> result = new ArrayList<Integer>();
for (char c : uncompressed.toCharArray()) {
String wc = w + c;
if (dictionary.containsKey(wc))
w = wc;
else {
result.add(dictionary.get(w));
// Add wc to the dictionary.
dictionary.put(wc, dictSize++);
w = "" + c;
}
}
// Output the code for w.
if (!w.equals(""))
result.add(dictionary.get(w));
return result;
}
/** Decompress a list of output ks to a string. */
public static String decompress(List<Integer> compressed) {
// Build the dictionary.
int dictSize = 256;
Map<Integer,String> dictionary = new HashMap<Integer,String>();
for (int i = 0; i < 256; i++)
dictionary.put(i, "" + (char)i);
String w = "" + (char)(int)compressed.remove(0);
StringBuffer result = new StringBuffer(w);
for (int k : compressed) {
String entry;
if (dictionary.containsKey(k))
entry = dictionary.get(k);
else if (k == dictSize)
entry = w + w.charAt(0);
else
throw new IllegalArgumentException("Bad compressed k: " + k);
result.append(entry);
// Add w+entry[0] to the dictionary.
dictionary.put(dictSize++, w + entry.charAt(0));
w = entry;
}
return result.toString();
}
public static void main(String[] args) {
List<Integer> compressed = compress("TOBEORNOTTOBEORTOBEORNOT");
System.out.println(compressed);
String decompressed = decompress(compressed);
System.out.println(decompressed);
}
}
//LZW Compression/Decompression for Strings
var LZW = {
compress: function (uncompressed) {
"use strict";
// Build the dictionary.
var i,
dictionary = {},
c,
wc,
w = "",
result = [],
dictSize = 256;
for (i = 0; i < 256; i += 1) {
dictionary[String.fromCharCode(i)] = i;
}
for (i = 0; i < uncompressed.length; i += 1) {
c = uncompressed.charAt(i);
wc = w + c;
//Do not use dictionary[wc] because javascript arrays
//will return values for array['pop'], array['push'] etc
// if (dictionary[wc]) {
if (dictionary.hasOwnProperty(wc)) {
w = wc;
} else {
result.push(dictionary[w]);
// Add wc to the dictionary.
dictionary[wc] = dictSize++;
w = String(c);
}
}
// Output the code for w.
if (w !== "") {
result.push(dictionary[w]);
}
return result;
},
decompress: function (compressed) {
"use strict";
// Build the dictionary.
var i,
dictionary = [],
w,
result,
k,
entry = "",
dictSize = 256;
for (i = 0; i < 256; i += 1) {
dictionary[i] = String.fromCharCode(i);
}
w = String.fromCharCode(compressed[0]);
result = w;
for (i = 1; i < compressed.length; i += 1) {
k = compressed[i];
if (dictionary[k]) {
entry = dictionary[k];
} else {
if (k === dictSize) {
entry = w + w.charAt(0);
} else {
return null;
}
}
result += entry;
// Add w+entry[0] to the dictionary.
dictionary[dictSize++] = w + entry.charAt(0);
w = entry;
}
return result;
}
}, // For Test Purposes
comp = LZW.compress("TOBEORNOTTOBEORTOBEORNOT"),
decomp = LZW.decompress(comp);
document.write(comp + '
' + decomp);
ES6
'use strict';
/**
Namespace for LZW compression and decompression.
Methods:
LZW.compress(uncompressed)
LZW.decompress(compressed)
*/
class LZW
{
/**
Perform the LZW compression
uncompressed - String. The string on which to perform the compression.
*/
static compress(uncompressed)
{
// Initialize dictionary
let dictionary = {};
for (let i = 0; i < 256; i++)
{
dictionary[String.fromCharCode(i)] = i;
}
let word = '';
let result = [];
let dictSize = 256;
for (let i = 0, len = uncompressed.length; i < len; i++)
{
let curChar = uncompressed[i];
let joinedWord = word + curChar;
// Do not use dictionary[joinedWord] because javascript objects
// will return values for myObject['toString']
if (dictionary.hasOwnProperty(joinedWord))
{
word = joinedWord;
}
else
{
result.push(dictionary[word]);
// Add wc to the dictionary.
dictionary[joinedWord] = dictSize++;
word = curChar;
}
}
if (word !== '')
{
result.push(dictionary[word]);
}
return result;
}
/**
Decompress LZW array generated by LZW.compress()
compressed - Array. The array that holds LZW compressed data.
*/
static decompress(compressed)
{
// Initialize Dictionary (inverse of compress)
let dictionary = {};
for (let i = 0; i < 256; i++)
{
dictionary[i] = String.fromCharCode(i);
}
let word = String.fromCharCode(compressed[0]);
let result = word;
let entry = '';
let dictSize = 256;
for (let i = 1, len = compressed.length; i < len; i++)
{
let curNumber = compressed[i];
if (dictionary[curNumber] !== undefined)
{
entry = dictionary[curNumber];
}
else
{
if (curNumber === dictSize)
{
entry = word + word[0];
}
else
{
throw 'Error in processing';
return null;
}
}
result += entry;
// Add word + entry[0] to dictionary
dictionary[dictSize++] = word + entry[0];
word = entry;
}
return result;
}
}
let comp = LZW.compress('TOBEORNOTTOBEORTOBEORNOT');
let decomp = LZW.decompress(comp);
console.log(`${comp}
${decomp}`);
// version 1.1.2
object Lzw {
/** Compress a string to a list of output symbols. */
fun compress(uncompressed: String): MutableList<Int> {
// Build the dictionary.
var dictSize = 256
val dictionary = mutableMapOf<String, Int>()
(0 until dictSize).forEach { dictionary.put(it.toChar().toString(), it)}
var w = ""
val result = mutableListOf<Int>()
for (c in uncompressed) {
val wc = w + c
if (dictionary.containsKey(wc))
w = wc
else {
result.add(dictionary[w]!!)
// Add wc to the dictionary.
dictionary.put(wc, dictSize++)
w = c.toString()
}
}
// Output the code for w
if (!w.isEmpty()) result.add(dictionary[w]!!)
return result
}
/** Decompress a list of output symbols to a string. */
fun decompress(compressed: MutableList<Int>): String {
// Build the dictionary.
var dictSize = 256
val dictionary = mutableMapOf<Int, String>()
(0 until dictSize).forEach { dictionary.put(it, it.toChar().toString())}
var w = compressed.removeAt(0).toChar().toString()
val result = StringBuilder(w)
for (k in compressed) {
var entry: String
if (dictionary.containsKey(k))
entry = dictionary[k]!!
else if (k == dictSize)
entry = w + w[0]
else
throw IllegalArgumentException("Bad compressed k: $k")
result.append(entry)
// Add w + entry[0] to the dictionary.
dictionary.put(dictSize++, w + entry[0])
w = entry
}
return result.toString()
}
}
fun main(args: Array<String>) {
val compressed = Lzw.compress("TOBEORNOTTOBEORTOBEORNOT")
println(compressed)
val decompressed = Lzw.decompress(compressed)
println(decompressed)
}
class LZW
{
function compress($unc) {
$i;$c;$wc;
$w = "";
$dictionary = array();
$result = array();
$dictSize = 256;
for ($i = 0; $i < 256; $i += 1) {
$dictionary[chr($i)] = $i;
}
for ($i = 0; $i < strlen($unc); $i++) {
$c = $unc[$i];
$wc = $w.$c;
if (array_key_exists($w.$c, $dictionary)) {
$w = $w.$c;
} else {
array_push($result,$dictionary[$w]);
$dictionary[$wc] = $dictSize++;
$w = (string)$c;
}
}
if ($w !== "") {
array_push($result,$dictionary[$w]);
}
return implode(",",$result);
}
function decompress($com) {
$com = explode(",",$com);
$i;$w;$k;$result;
$dictionary = array();
$entry = "";
$dictSize = 256;
for ($i = 0; $i < 256; $i++) {
$dictionary[$i] = chr($i);
}
$w = chr($com[0]);
$result = $w;
for ($i = 1; $i < count($com);$i++) {
$k = $com[$i];
if ($dictionary[$k]) {
$entry = $dictionary[$k];
} else {
if ($k === $dictSize) {
$entry = $w.$w[0];
} else {
return null;
}
}
$result .= $entry;
$dictionary[$dictSize++] = $w . $entry[0];
$w = $entry;
}
return $result;
}
}
//How to use
$str = 'TOBEORNOTTOBEORTOBEORNOT';
$lzw = new LZW();
$com = $lzw->compress($str);
$dec = $lzw->decompress($com);
echo $com . "
" . $dec;
def compress(uncompressed):
"""Compress a string to a list of output symbols."""
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), i) for i in range(dict_size))
# in Python 3: dictionary = {chr(i): i for i in range(dict_size)}
w = ""
result = []
for c in uncompressed:
wc = w + c
if wc in dictionary:
w = wc
else:
result.append(dictionary[w])
# Add wc to the dictionary.
dictionary[wc] = dict_size
dict_size += 1
w = c
# Output the code for w.
if w:
result.append(dictionary[w])
return result
def decompress(compressed):
"""Decompress a list of output ks to a string."""
from io import StringIO
# Build the dictionary.
dict_size = 256
dictionary = dict((i, chr(i)) for i in range(dict_size))
# in Python 3: dictionary = {i: chr(i) for i in range(dict_size)}
# use StringIO, otherwise this becomes O(N^2)
# due to string concatenation in a loop
result = StringIO()
w = chr(compressed.pop(0))
result.write(w)
for k in compressed:
if k in dictionary:
entry = dictionary[k]
elif k == dict_size:
entry = w + w[0]
else:
raise ValueError('Bad compressed k: %s' % k)
result.write(entry)
# Add w+entry[0] to the dictionary.
dictionary[dict_size] = w + entry[0]
dict_size += 1
w = entry
return result.getvalue()
# How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
print (compressed)
decompressed = decompress(compressed)
print (decompressed)