ChunkWriter

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.maven.index.reader;

import java.io.Closeable;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.zip.GZIPOutputStream;

/**
 * Maven Index published binary chunk writer, it writes raw Maven Indexer records to the transport binary format.
 * Instances of this class MUST BE handled as resources (have them closed once done with them), it is user
 * responsibility to close them, ideally in try-with-resource block.
 *
 * @since 5.1.2
 */
public class ChunkWriter implements Closeable {
    private static final int F_INDEXED = 1;

    private static final int F_TOKENIZED = 2;

    private static final int F_STORED = 4;

    private final String chunkName;

    private final DataOutputStream dataOutputStream;

    private final int version;

    private final Date timestamp;

    public ChunkWriter(final String chunkName, final OutputStream outputStream, final int version, final Date timestamp)
            throws IOException {
        this.chunkName = chunkName.trim();
        this.dataOutputStream = new DataOutputStream(new GZIPOutputStream(outputStream, 2 * 1024));
        this.version = version;
        this.timestamp = timestamp;

        dataOutputStream.writeByte(version);
        dataOutputStream.writeLong(timestamp == null ? -1 : timestamp.getTime());
    }

    /**
     * Returns the chunk name.
     */
    public String getName() {
        return chunkName;
    }

    /**
     * Returns index version. All releases so far always returned {@code 1}.
     */
    public int getVersion() {
        return version;
    }

    /**
     * Returns the index timestamp of last update of the index.
     */
    public Date getTimestamp() {
        return timestamp;
    }

    /**
     * Writes out the record iterator and returns the written record count.
     */
    public int writeChunk(final Iterator<Map<String, String>> iterator) throws IOException {
        int written = 0;
        while (iterator.hasNext()) {
            writeRecord(iterator.next(), dataOutputStream);
            written++;
        }
        return written;
    }

    /**
     * Closes this reader and it's underlying input.
     */
    @Override
    public void close() throws IOException {
        dataOutputStream.close();
    }

    private static void writeRecord(final Map<String, String> record, final DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(record.size());
        for (Map.Entry<String, String> entry : record.entrySet()) {
            writeField(entry.getKey(), entry.getValue(), dataOutput);
        }
    }

    private static void writeField(final String fieldName, final String fieldValue, final DataOutput dataOutput)
            throws IOException {
        boolean isIndexed = !(fieldName.equals("i") || fieldName.equals("m"));
        boolean isTokenized =
                !(fieldName.equals("i") || fieldName.equals("m") || fieldName.equals("1") || fieldName.equals("px"));
        int flags = (isIndexed ? F_INDEXED : 0) + (isTokenized ? F_TOKENIZED : 0) + F_STORED;
        dataOutput.writeByte(flags);
        dataOutput.writeUTF(fieldName);
        writeUTF(fieldValue, dataOutput);
    }

    private static void writeUTF(final String str, final DataOutput dataOutput) throws IOException {
        int strlen = str.length();
        int utflen = 0;
        int c;
        // use charAt instead of copying String to char array
        for (int i = 0; i < strlen; i++) {
            c = str.charAt(i);
            if ((c >= 0x0001) && (c <= 0x007F)) {
                utflen++;
            } else if (c > 0x07FF) {
                utflen += 3;
            } else {
                utflen += 2;
            }
        }
        dataOutput.writeInt(utflen);
        byte[] bytearr = new byte[utflen];
        int count = 0;
        int i = 0;
        for (; i < strlen; i++) {
            c = str.charAt(i);
            if (!((c >= 0x0001) && (c <= 0x007F))) {
                break;
            }
            bytearr[count++] = (byte) c;
        }
        for (; i < strlen; i++) {
            c = str.charAt(i);
            if ((c >= 0x0001) && (c <= 0x007F)) {
                bytearr[count++] = (byte) c;

            } else if (c > 0x07FF) {
                bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
                bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
                bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
            } else {
                bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
                bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
            }
        }
        dataOutput.write(bytearr, 0, utflen);
    }
}