Skip to content

Using and extending the code

flanglet edited this page Apr 22, 2022 · 24 revisions

Compressing/Decompressing data (C++)

Here is how to compress/decompress a block to/from a file using RLT+TEXT as transform, Huffman as entropy codec, using a block size of 1 MB, 4 jobs and a checksum.

Just create a CompressedOutputStream/CompressedInputStream to write/read compressed data.

Notice that CompressedOutputStream is a std::ostream and is a CompressedInputStream std::istream, so most operations on streams work as usual.

#include <fstream>
#include <iostream>
#include "types.hpp"
#include "InputStream.hpp"
#include "OutputStream.hpp"
#include "io/CompressedInputStream.hpp"
#include "io/CompressedOutputStream.hpp"


uint64 Compress(byte block[], uint length) {
	// Create an OutputStream
	OutputStream* os = new ofstream("compressed.knz", ofstream::out | ofstream::binary);

	// Create a CompressedOutputStream
	CompressedOutputStream* cos = new CompressedOutputStream(*os, "HUFFMAN", "RLT+TEXT", 1024 * 1024, true, 4);

	// Compress block
	cos->write((const char*) block, length);

	// Close CompressedOutputStream
	cos->close();
	
	// Get number of bytes written
	uint64 written = cos->getWritten();
	delete cos;
	delete os;
	return written;
}

uint64 Decompress(byte block[], uint length) {
	// Create an InputStream
	InputStream* is = ifstream("compressed.knz", ifstream::in | ifstream::binary);

	// Create a CompressedInputStream
	CompressedInputStream* cis = new CompressedInputStream(*is, 4);

	// Decompress block
	cis->read((char*) block, length);    

	// Close CompressedInputStream
	cis->close();

	// Get number of bytes read
	uint64 read = cis->getRead();
	delete cis;
	delete is;
	return read;
}

Compressing/Decompressing data (C)

Kanzi exposes a C API (see api/libapi.hpp) and can be built as a static (.a) or a dynamic library (.so/.dll).

Below is an example of a C program compressing and decompressing data using the C API.

/* EG. gcc testAPI.c -o r:\testAPI.exe -lkanzi */
/* EG. gcc testAPI.c -o testAPI -lkanzi */

#include "api/libapi.hpp"
#include <stdlib.h>

int testDecompress()
{
    const int blkSize = 4 * 1024 * 1024;
    struct dData dd = { blkSize, 1 };
    FILE* in = fopen("/tmp/enwik8.knz", "rb");
    FILE* out = fopen("/tmp/enwik8.knz.bak", "wb");
    struct dContext* ctx;
    int res = 0;

    if ((res = initDecompressor(&dd, in, &ctx)) != 0)
        return res;

    int r = 0, w = 0, inSize = 0, outSize = 0;
    BYTE* dst = (BYTE*)malloc(blkSize);

    do {
        w = blkSize;
        res = decompress(ctx, dst, &r, &w);

        if ((w = fwrite(dst, 1, w, out)) == 0)
            break;

        inSize += r;
        outSize += w;
    } while (res == 0);

    if ((res = disposeDecompressor(ctx)) != 0)
        return res;

    printf("Size before decompression: %i bytes(s)\n", inSize);
    printf("Size after decompression:  %i bytes(s)\n", outSize);
    fclose(in);
    fclose(out);
    free(dst);
    return res;
}

int testCompress()
{
    const int blkSize = 4 * 1024 * 1024;
    struct cData cd = { "BWT+RANK+MTFT", "FPAQ", blkSize, 1, 4 };
    FILE* in = fopen("/tmp/enwik8", "rb");
    FILE* out = fopen("/tmp/enwik8.knz", "wb");
    struct cContext* ctx;
    int res = 0;

    if ((res = initCompressor(&cd, out, &ctx)) != 0)
        return res;

    int r = 0, w = 0, inSize = 0, outSize = 0;
    BYTE* src = (BYTE*)malloc(blkSize);

    while ((r = fread(src, 1, blkSize, in)) != 0) {
        if ((res = compress(ctx, src, &r, &w)) != 0)
            return res;

        inSize += r;
        outSize += w;
    }

    if ((res = disposeCompressor(ctx, &w)) != 0)
        return res;

    outSize += w;
    printf("Size before compression: %i bytes(s)\n", inSize);
    printf("Size after compression:  %i bytes(s)\n", outSize);
    fclose(in);
    fclose(out);
    free(src);
    return res;
}

int main(int argc, const char** argv)
{
    int res;

    if ((res = testCompress()) != 0)
        return res;

    if ((res = testDecompress()) != 0)
        return res;

    return 0;
}

Implementing a new transform

Here is how to implement and add a new transform to kanzi.

  • Step 1: write the transform code

For example:

#include "../Context.hpp"
#include "../Transform.hpp"

   class SuperDuperTransform : public Transform<byte> 
   {
   public:
       SuperDuperTransform() {}
       SuperDuperTransform(Context&) {}
       ~SuperDuperTransform() {}

       bool forward(SliceArray<byte>& input, SliceArray<byte>& output, int length) THROW { 
             // Ensure enough room in the destination buffer
             if (output._length - output._index < getMaxEncodedLength(length))
                  return false;

             byte* src = &input._array[input._index];
             byte* dst = &output._array[output._index];

             for (int i = 0; i < length; i++)
                 dst[i] = src[i] ^ byte(0xAA);

             input._index += length;
             output._index += length;
             return true; 
       }

       bool inverse(SliceArray<byte>& input, SliceArray<byte>& output, int length) THROW { 
             byte* src = &input._array[input._index];
             byte* dst = &output._array[output._index];

             for (int i = 0; i < length; i++)
                 dst[i] = src[i] ^ byte(0xAA);

             input._index += length;
             output._index += length;
             return true; 
       }

       int getMaxEncodedLength(int inputLen) const { return inputLen; }
   };

Always provide a constructor with a Context: the context contains all the application wide information (such as block size, number of jobs, input & output names, etc ...). Always inherit from Transform<T> and respect the number of jobs provided in the context. Implement forward and inverse methods as well as getMaxEncodedLength(int). Do not write to stdio or stderr. Be aware that your code must be multi-thread safe.

  • Step 2: Register the transform in tranform/TransformFactory.hpp

Add the type, say

 static const uint64 SUPERDUPER_TYPE = 63; 

Let us say you use the name "SUPERDUPER" for the transform. Update the following methods:

 template <class T> uint64 TransformFactory<T>::getTypeToken(const char* tName) THROW
 template <class T> Transform<T>* TransformFactory<T>::newToken(Context& ctx, uint64 functionType) THROW
 template <class T> const char* TransformFactory<T>::getNameToken(uint64 functionType) THROW
  • Step 3: Update the help message in app/Kanzi.cpp

In Kanzi::printHelp, add the SUPERDUPER transform to the list in the -t option section.

  • This is it. For example, run
kanzi -i foo.txt -f -t SUPERDUPER -e none -j 2 -v 4
Clone this wiki locally