View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.index.reader;
20  
21  import java.io.Closeable;
22  import java.io.DataInput;
23  import java.io.DataInputStream;
24  import java.io.EOFException;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.io.UTFDataFormatException;
28  import java.util.Date;
29  import java.util.HashMap;
30  import java.util.Iterator;
31  import java.util.Map;
32  import java.util.NoSuchElementException;
33  import java.util.zip.GZIPInputStream;
34  
35  /**
36   * Maven Index published binary chunk reader, it reads raw Maven Indexer records from the transport binary format.
37   * Instances of this class MUST BE handled as resources (have them closed once done with them), it is user
38   * responsibility to close them, ideally in try-with-resource block.
39   *
40   * @since 5.1.2
41   */
42  public class ChunkReader implements Closeable, Iterable<Map<String, String>> {
43      private final String chunkName;
44  
45      private final DataInputStream dataInputStream;
46  
47      private final int version;
48  
49      private final Date timestamp;
50  
51      public ChunkReader(final String chunkName, final InputStream inputStream) throws IOException {
52          this.chunkName = chunkName.trim();
53          this.dataInputStream = new DataInputStream(new GZIPInputStream(inputStream, 2 * 1024));
54          this.version = ((int) dataInputStream.readByte()) & 0xff;
55          this.timestamp = new Date(dataInputStream.readLong());
56      }
57  
58      /**
59       * Returns the chunk name.
60       */
61      public String getName() {
62          return chunkName;
63      }
64  
65      /**
66       * Returns index version. All releases so far always returned {@code 1}.
67       */
68      public int getVersion() {
69          return version;
70      }
71  
72      /**
73       * Returns the index timestamp of last update of the index.
74       */
75      public Date getTimestamp() {
76          return timestamp;
77      }
78  
79      /**
80       * Returns the {@link Record} iterator.
81       */
82      @Override
83      public Iterator<Map<String, String>> iterator() {
84          try {
85              return new IndexIterator(dataInputStream);
86          } catch (IOException e) {
87              throw new RuntimeException("error", e);
88          }
89      }
90  
91      /**
92       * Closes this reader and it's underlying input.
93       */
94      @Override
95      public void close() throws IOException {
96          dataInputStream.close();
97      }
98  
99      /**
100      * Low memory footprint index iterator that incrementally parses the underlying stream.
101      */
102     private static class IndexIterator implements Iterator<Map<String, String>> {
103         private final DataInputStream dataInputStream;
104 
105         private Map<String, String> nextRecord;
106 
107         private IndexIterator(final DataInputStream dataInputStream) throws IOException {
108             this.dataInputStream = dataInputStream;
109             this.nextRecord = nextRecord();
110         }
111 
112         @Override
113         public boolean hasNext() {
114             return nextRecord != null;
115         }
116 
117         @Override
118         public Map<String, String> next() {
119             if (nextRecord == null) {
120                 throw new NoSuchElementException("chunk depleted");
121             }
122             Map<String, String> result = nextRecord;
123             nextRecord = nextRecord();
124             return result;
125         }
126 
127         @Override
128         public void remove() {
129             throw new UnsupportedOperationException("remove");
130         }
131 
132         private Map<String, String> nextRecord() {
133             try {
134                 return readRecord(dataInputStream);
135             } catch (IOException e) {
136                 throw new RuntimeException("read error", e);
137             }
138         }
139     }
140 
141     /**
142      * Reads and returns next record from the underlying stream, or {@code null} if no more records.
143      */
144     private static Map<String, String> readRecord(final DataInput dataInput) throws IOException {
145         int fieldCount;
146         try {
147             fieldCount = dataInput.readInt();
148         } catch (EOFException ex) {
149             return null; // no more documents
150         }
151 
152         Map<String, String> recordMap = new HashMap<>();
153         for (int i = 0; i < fieldCount; i++) {
154             readField(recordMap, dataInput);
155         }
156         return recordMap;
157     }
158 
159     private static void readField(final Map<String, String> record, final DataInput dataInput) throws IOException {
160         dataInput.readByte(); // flags: neglect them
161         String name = dataInput.readUTF();
162         String value = readUTF(dataInput);
163         record.put(name, value);
164     }
165 
166     private static String readUTF(final DataInput dataInput) throws IOException {
167         int utflen = dataInput.readInt();
168 
169         byte[] bytearr;
170         char[] chararr;
171 
172         try {
173             bytearr = new byte[utflen];
174             chararr = new char[utflen];
175         } catch (OutOfMemoryError e) {
176             throw new IOException("Index data content is corrupt", e);
177         }
178 
179         int c, char2, char3;
180         int count = 0;
181         int chararrCount = 0;
182 
183         dataInput.readFully(bytearr, 0, utflen);
184 
185         while (count < utflen) {
186             c = bytearr[count] & 0xff;
187             if (c > 127) {
188                 break;
189             }
190             count++;
191             chararr[chararrCount++] = (char) c;
192         }
193 
194         while (count < utflen) {
195             c = bytearr[count] & 0xff;
196             switch (c >> 4) {
197                 case 0:
198                 case 1:
199                 case 2:
200                 case 3:
201                 case 4:
202                 case 5:
203                 case 6:
204                 case 7:
205                     /* 0xxxxxxx */
206                     count++;
207                     chararr[chararrCount++] = (char) c;
208                     break;
209 
210                 case 12:
211                 case 13:
212                     /* 110x xxxx 10xx xxxx */
213                     count += 2;
214                     if (count > utflen) {
215                         throw new UTFDataFormatException("malformed input: partial character at end");
216                     }
217                     char2 = bytearr[count - 1];
218                     if ((char2 & 0xC0) != 0x80) {
219                         throw new UTFDataFormatException("malformed input around byte " + count);
220                     }
221                     chararr[chararrCount++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
222                     break;
223 
224                 case 14:
225                     /* 1110 xxxx 10xx xxxx 10xx xxxx */
226                     count += 3;
227                     if (count > utflen) {
228                         throw new UTFDataFormatException("malformed input: partial character at end");
229                     }
230                     char2 = bytearr[count - 2];
231                     char3 = bytearr[count - 1];
232                     if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) {
233                         throw new UTFDataFormatException("malformed input around byte " + (count - 1));
234                     }
235                     chararr[chararrCount++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | (char3 & 0x3F));
236                     break;
237 
238                 default:
239                     /* 10xx xxxx, 1111 xxxx */
240                     throw new UTFDataFormatException("malformed input around byte " + count);
241             }
242         }
243 
244         // The number of chars produced may be less than utflen
245         return new String(chararr, 0, chararrCount);
246     }
247 }