001/*
002 * To change this template, choose Tools | Templates
003 * and open the template in the editor.
004 */
005package org.anarres.parallelgzip;
006
007import java.io.ByteArrayOutputStream;
008import java.io.FilterOutputStream;
009import java.io.IOException;
010import java.io.InterruptedIOException;
011import java.io.OutputStream;
012import java.nio.ByteBuffer;
013import java.nio.ByteOrder;
014import java.util.concurrent.ArrayBlockingQueue;
015import java.util.concurrent.BlockingQueue;
016import java.util.concurrent.Callable;
017import java.util.concurrent.ExecutionException;
018import java.util.concurrent.ExecutorService;
019import java.util.concurrent.Future;
020import java.util.concurrent.ThreadPoolExecutor;
021import java.util.zip.CRC32;
022import java.util.zip.Deflater;
023import java.util.zip.DeflaterOutputStream;
024import java.util.zip.GZIPOutputStream;
025import javax.annotation.CheckForNull;
026import javax.annotation.Nonnegative;
027import javax.annotation.Nonnull;
028
029/**
030 * A multi-threaded version of {@link GZIPOutputStream}.
031 *
032 * @author shevek
033 */
034public class ParallelGZIPOutputStream extends FilterOutputStream {
035
036    private static final int GZIP_MAGIC = 0x8b1f;
037    private static final int SIZE = 64 * 1024;
038
039    @Nonnull
040    private static Deflater newDeflater() {
041        return new Deflater(Deflater.DEFAULT_COMPRESSION, true);
042    }
043
044    @Nonnull
045    private static DeflaterOutputStream newDeflaterOutputStream(@Nonnull OutputStream out, @Nonnull Deflater deflater) {
046        return new DeflaterOutputStream(out, deflater, 512, true);
047    }
048
049    /* Allow write into byte[] directly */
050    private static class ByteArrayOutputStreamExposed extends ByteArrayOutputStream {
051
052        public ByteArrayOutputStreamExposed(int size) {
053            super(size);
054        }
055
056        public void writeTo(@Nonnull byte[] buf) throws IOException {
057            System.arraycopy(this.buf, 0, buf, 0, count);
058        }
059    }
060
061    private static class State {
062
063        private final Deflater def = newDeflater();
064        private final ByteArrayOutputStreamExposed buf = new ByteArrayOutputStreamExposed(SIZE + (SIZE >> 3));
065        private final DeflaterOutputStream str = newDeflaterOutputStream(buf, def);
066    }
067
068    /** This ThreadLocal avoids the recycling of a lot of memory, causing lumpy performance. */
069    private static final ThreadLocal<State> STATE = new ThreadLocal<State>() {
070        @Override
071        protected State initialValue() {
072            return new State();
073        }
074    };
075
076    private static class Block implements Callable<Block> {
077
078        // private final int index;
079        private byte[] buf = new byte[SIZE + (SIZE >> 3)];
080        private int buf_length = 0;
081
082        /*
083         public Block(@Nonnegative int index) {
084         this.index = index;
085         }
086         */
087        // Only on worker thread
088        @Override
089        public Block call() throws IOException {
090            // LOG.info("Processing " + this + " on " + Thread.currentThread());
091
092            State state = STATE.get();
093            // ByteArrayOutputStream buf = new ByteArrayOutputStream(in.length);   // Overestimate output size required.
094            // DeflaterOutputStream def = newDeflaterOutputStream(buf);
095            state.def.reset();
096            state.buf.reset();
097            state.str.write(buf, 0, buf_length);
098            state.str.flush();
099
100            // int in_length = buf_length;
101            int out_length = state.buf.size();
102            if (out_length > buf.length)
103                this.buf = new byte[out_length];
104            // System.out.println("Compressed " + in_length + " to " + out_length + " bytes.");
105            this.buf_length = out_length;
106            state.buf.writeTo(buf);
107
108            // return Arrays.copyOf(in, in_length);
109            return this;
110        }
111
112        @Override
113        public String toString() {
114            return "Block" /* + index */ + "(" + buf_length + "/" + buf.length + " bytes)";
115        }
116    }
117
118    @Nonnegative
119    private static int getThreadCount(@Nonnull ExecutorService executor) {
120        if (executor instanceof ThreadPoolExecutor)
121            return ((ThreadPoolExecutor) executor).getMaximumPoolSize();
122        return Runtime.getRuntime().availableProcessors();
123    }
124
125    // TODO: Share, daemonize.
126    private final ExecutorService executor;
127    private final CRC32 crc = new CRC32();
128    private final int emitQueueSize;
129    private final BlockingQueue<Future<Block>> emitQueue;
130    @Nonnull
131    private Block block = new Block();
132    @CheckForNull
133    private Block freeBlock = null;
134    /** Used as a sentinel for 'closed'. */
135    private long bytesWritten = 0;
136
137    // Master thread only
138    @Deprecated // Doesn't really use the given number of threads.
139    public ParallelGZIPOutputStream(@Nonnull OutputStream out, @Nonnull ExecutorService executor, @Nonnegative int nthreads) throws IOException {
140        super(out);
141        this.executor = executor;
142        // Some blocks compress faster than others; allow a long enough queue to keep all CPUs busy at least for a bit.
143        this.emitQueueSize = nthreads * 3;
144        this.emitQueue = new ArrayBlockingQueue<Future<Block>>(emitQueueSize);
145        writeHeader();
146    }
147
148    /**
149     * Creates a ParallelGZIPOutputStream
150     * using {@link ParallelGZIPEnvironment#getSharedThreadPool()}.
151     *
152     * @param out the eventual output stream for the compressed data.
153     * @throws IOException if it all goes wrong.
154     */
155    @Deprecated // Doesn't really use the given number of threads.
156    public ParallelGZIPOutputStream(@Nonnull OutputStream out, @Nonnegative int nthreads) throws IOException {
157        this(out, ParallelGZIPEnvironment.getSharedThreadPool(), nthreads);
158    }
159
160    public ParallelGZIPOutputStream(@Nonnull OutputStream out, @Nonnull ExecutorService executor) throws IOException {
161        this(out, executor, getThreadCount(executor));
162    }
163
164    /**
165     * Creates a ParallelGZIPOutputStream
166     * using {@link ParallelGZIPEnvironment#getSharedThreadPool()}
167     * and {@link Runtime#availableProcessors()}.
168     *
169     * @param out the eventual output stream for the compressed data.
170     * @throws IOException if it all goes wrong.
171     */
172    public ParallelGZIPOutputStream(@Nonnull OutputStream out) throws IOException {
173        this(out, Runtime.getRuntime().availableProcessors());
174    }
175
176    /*
177     * @see http://www.gzip.org/zlib/rfc-gzip.html#file-format
178     */
179    private void writeHeader() throws IOException {
180        out.write(new byte[]{
181            (byte) GZIP_MAGIC, // ID1: Magic number (little-endian short)
182            (byte) (GZIP_MAGIC >> 8), // ID2: Magic number (little-endian short)
183            Deflater.DEFLATED, // CM: Compression method
184            0, // FLG: Flags (byte)
185            0, 0, 0, 0, // MTIME: Modification time (int)
186            0, // XFL: Extra flags
187            3 // OS: Operating system (3 = Linux)
188        });
189    }
190
191    // Master thread only
192    @Override
193    public void write(int b) throws IOException {
194        byte[] single = new byte[1];
195        single[0] = (byte) (b & 0xFF);
196        write(single);
197    }
198
199    // Master thread only
200    @Override
201    public void write(byte[] b) throws IOException {
202        write(b, 0, b.length);
203    }
204
205    // Master thread only
206    @Override
207    public void write(byte[] b, int off, int len) throws IOException {
208        crc.update(b, off, len);
209        bytesWritten += len;
210
211        while (len > 0) {
212            final byte[] blockBuf = block.buf;
213            // assert block.in_length < block.in.length
214            int capacity = SIZE - block.buf_length; // Make sure we don't grow the block buf repeatedly.
215            if (len >= capacity) {
216                System.arraycopy(b, off, blockBuf, block.buf_length, capacity);
217                block.buf_length += capacity;   // == block.in.length
218                off += capacity;
219                len -= capacity;
220                submit();
221            } else {
222                System.arraycopy(b, off, blockBuf, block.buf_length, len);
223                block.buf_length += len;
224                // off += len;
225                // len = 0;
226                break;
227            }
228        }
229    }
230
231    // Master thread only
232    private void submit() throws IOException {
233        emitUntil(emitQueueSize - 1);
234        emitQueue.add(executor.submit(block));
235        Block b = freeBlock;
236        if (b != null)
237            freeBlock = null;
238        else
239            b = new Block();
240        block = b;
241    }
242
243    // Emit If Available - submit always
244    // Emit At Least one - submit when executor is full
245    // Emit All Remaining - flush(), close()
246    // Master thread only
247    private void tryEmit() throws IOException, InterruptedException, ExecutionException {
248        for (;;) {
249            Future<Block> future = emitQueue.peek();
250            // LOG.info("Peeked future " + future);
251            if (future == null)
252                return;
253            if (!future.isDone())
254                return;
255            // It's an ordered queue. This MUST be the same element as above.
256            Block b = emitQueue.remove().get();
257            // System.out.println("Chance-emitting block " + b);
258            out.write(b.buf, 0, b.buf_length);
259            b.buf_length = 0;
260            freeBlock = b;
261        }
262    }
263
264    // Master thread only
265    /** Emits any opportunistically available blocks. Furthermore, emits blocks until the number of executing tasks is less than taskCountAllowed. */
266    private void emitUntil(@Nonnegative int taskCountAllowed) throws IOException {
267        try {
268            while (emitQueue.size() > taskCountAllowed) {
269                // LOG.info("Waiting for taskCount=" + emitQueue.size() + " -> " + taskCountAllowed);
270                Block b = emitQueue.remove().get();  // Valid because emitQueue.size() > 0
271                // System.out.println("Force-emitting block " + b);
272                out.write(b.buf, 0, b.buf_length);  // Blocks until this task is done.
273                b.buf_length = 0;
274                freeBlock = b;
275            }
276            // We may have achieved more opportunistically available blocks
277            // while waiting for a block above. Let's emit them here.
278            tryEmit();
279        } catch (ExecutionException e) {
280            throw new IOException(e);
281        } catch (InterruptedException e) {
282            throw new InterruptedIOException();
283        }
284    }
285
286    // Master thread only
287    @Override
288    public void flush() throws IOException {
289        // LOG.info("Flush: " + block);
290        if (block.buf_length > 0)
291            submit();
292        emitUntil(0);
293        super.flush();
294    }
295
296    // Master thread only
297    @Override
298    public void close() throws IOException {
299        // LOG.info("Closing: bytesWritten=" + bytesWritten);
300        if (bytesWritten >= 0) {
301            flush();
302
303            newDeflaterOutputStream(out, newDeflater()).finish();
304
305            ByteBuffer buf = ByteBuffer.allocate(8);
306            buf.order(ByteOrder.LITTLE_ENDIAN);
307            // LOG.info("CRC is " + crc.getValue());
308            buf.putInt((int) crc.getValue());
309            buf.putInt((int) (bytesWritten % 4294967296L));
310            out.write(buf.array()); // allocate() guarantees a backing array.
311            // LOG.info("trailer is " + Arrays.toString(buf.array()));
312
313            out.flush();
314            out.close();
315
316            bytesWritten = Integer.MIN_VALUE;
317            // } else {
318            // LOG.warn("Already closed.");
319
320            freeBlock = null;
321        }
322    }
323}