View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.index.reader;
20  
21  import java.io.Closeable;
22  import java.io.DataOutput;
23  import java.io.DataOutputStream;
24  import java.io.IOException;
25  import java.io.OutputStream;
26  import java.util.Date;
27  import java.util.Iterator;
28  import java.util.Map;
29  import java.util.zip.GZIPOutputStream;
30  
31  /**
32   * Maven Index published binary chunk writer, it writes raw Maven Indexer records to the transport binary format.
33   * Instances of this class MUST BE handled as resources (have them closed once done with them), it is user
34   * responsibility to close them, ideally in try-with-resource block.
35   *
36   * @since 5.1.2
37   */
38  public class ChunkWriter implements Closeable {
39      private static final int F_INDEXED = 1;
40  
41      private static final int F_TOKENIZED = 2;
42  
43      private static final int F_STORED = 4;
44  
45      private final String chunkName;
46  
47      private final DataOutputStream dataOutputStream;
48  
49      private final int version;
50  
51      private final Date timestamp;
52  
53      public ChunkWriter(final String chunkName, final OutputStream outputStream, final int version, final Date timestamp)
54              throws IOException {
55          this.chunkName = chunkName.trim();
56          this.dataOutputStream = new DataOutputStream(new GZIPOutputStream(outputStream, 2 * 1024));
57          this.version = version;
58          this.timestamp = timestamp;
59  
60          dataOutputStream.writeByte(version);
61          dataOutputStream.writeLong(timestamp == null ? -1 : timestamp.getTime());
62      }
63  
64      /**
65       * Returns the chunk name.
66       */
67      public String getName() {
68          return chunkName;
69      }
70  
71      /**
72       * Returns index version. All releases so far always returned {@code 1}.
73       */
74      public int getVersion() {
75          return version;
76      }
77  
78      /**
79       * Returns the index timestamp of last update of the index.
80       */
81      public Date getTimestamp() {
82          return timestamp;
83      }
84  
85      /**
86       * Writes out the record iterator and returns the written record count.
87       */
88      public int writeChunk(final Iterator<Map<String, String>> iterator) throws IOException {
89          int written = 0;
90          while (iterator.hasNext()) {
91              writeRecord(iterator.next(), dataOutputStream);
92              written++;
93          }
94          return written;
95      }
96  
97      /**
98       * Closes this reader and it's underlying input.
99       */
100     @Override
101     public void close() throws IOException {
102         dataOutputStream.close();
103     }
104 
105     private static void writeRecord(final Map<String, String> record, final DataOutput dataOutput) throws IOException {
106         dataOutput.writeInt(record.size());
107         for (Map.Entry<String, String> entry : record.entrySet()) {
108             writeField(entry.getKey(), entry.getValue(), dataOutput);
109         }
110     }
111 
112     private static void writeField(final String fieldName, final String fieldValue, final DataOutput dataOutput)
113             throws IOException {
114         boolean isIndexed = !(fieldName.equals("i") || fieldName.equals("m"));
115         boolean isTokenized =
116                 !(fieldName.equals("i") || fieldName.equals("m") || fieldName.equals("1") || fieldName.equals("px"));
117         int flags = (isIndexed ? F_INDEXED : 0) + (isTokenized ? F_TOKENIZED : 0) + F_STORED;
118         dataOutput.writeByte(flags);
119         dataOutput.writeUTF(fieldName);
120         writeUTF(fieldValue, dataOutput);
121     }
122 
123     private static void writeUTF(final String str, final DataOutput dataOutput) throws IOException {
124         int strlen = str.length();
125         int utflen = 0;
126         int c;
127         // use charAt instead of copying String to char array
128         for (int i = 0; i < strlen; i++) {
129             c = str.charAt(i);
130             if ((c >= 0x0001) && (c <= 0x007F)) {
131                 utflen++;
132             } else if (c > 0x07FF) {
133                 utflen += 3;
134             } else {
135                 utflen += 2;
136             }
137         }
138         dataOutput.writeInt(utflen);
139         byte[] bytearr = new byte[utflen];
140         int count = 0;
141         int i = 0;
142         for (; i < strlen; i++) {
143             c = str.charAt(i);
144             if (!((c >= 0x0001) && (c <= 0x007F))) {
145                 break;
146             }
147             bytearr[count++] = (byte) c;
148         }
149         for (; i < strlen; i++) {
150             c = str.charAt(i);
151             if ((c >= 0x0001) && (c <= 0x007F)) {
152                 bytearr[count++] = (byte) c;
153 
154             } else if (c > 0x07FF) {
155                 bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
156                 bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
157                 bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
158             } else {
159                 bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
160                 bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
161             }
162         }
163         dataOutput.write(bytearr, 0, utflen);
164     }
165 }