View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.index.updater;
20  
21  import java.io.BufferedOutputStream;
22  import java.io.DataOutput;
23  import java.io.DataOutputStream;
24  import java.io.IOException;
25  import java.io.OutputStream;
26  import java.util.ArrayList;
27  import java.util.Date;
28  import java.util.List;
29  import java.util.zip.GZIPOutputStream;
30  
31  import org.apache.lucene.document.Document;
32  import org.apache.lucene.document.Field;
33  import org.apache.lucene.document.StoredField;
34  import org.apache.lucene.index.IndexOptions;
35  import org.apache.lucene.index.IndexReader;
36  import org.apache.lucene.index.IndexableField;
37  import org.apache.lucene.index.MultiBits;
38  import org.apache.lucene.util.Bits;
39  import org.apache.maven.index.ArtifactInfo;
40  import org.apache.maven.index.IndexerField;
41  import org.apache.maven.index.context.DefaultIndexingContext;
42  import org.apache.maven.index.context.IndexingContext;
43  
44  /**
45   * An index data writer used to write transfer index format.
46   *
47   * @author Eugene Kuleshov
48   */
49  public class IndexDataWriter {
50      static final int VERSION = 1;
51  
52      static final int F_INDEXED = 1;
53  
54      static final int F_TOKENIZED = 2;
55  
56      static final int F_STORED = 4;
57  
58      static final int F_COMPRESSED = 8;
59  
60      private final DataOutputStream dos;
61  
62      private final GZIPOutputStream gos;
63  
64      private final BufferedOutputStream bos;
65  
66      private boolean descriptorWritten;
67  
68      public IndexDataWriter(OutputStream os) throws IOException {
69          bos = new BufferedOutputStream(os, 1024 * 8);
70          gos = new GZIPOutputStream(bos, 1024 * 2);
71          dos = new DataOutputStream(gos);
72  
73          this.descriptorWritten = false;
74      }
75  
76      public int write(IndexingContext context, IndexReader indexReader, List<Integer> docIndexes) throws IOException {
77          writeHeader(context);
78  
79          int n = writeDocuments(indexReader, docIndexes);
80  
81          writeGroupFields(context);
82  
83          close();
84  
85          return n;
86      }
87  
88      public void close() throws IOException {
89          dos.flush();
90  
91          gos.flush();
92          gos.finish();
93  
94          bos.flush();
95      }
96  
97      public void writeHeader(IndexingContext context) throws IOException {
98          dos.writeByte(VERSION);
99  
100         Date timestamp = context.getTimestamp();
101         dos.writeLong(timestamp == null ? -1 : timestamp.getTime());
102     }
103 
104     public void writeGroupFields(IndexingContext context) throws IOException {
105         {
106             List<IndexableField> allGroupsFields = new ArrayList<>(2);
107             allGroupsFields.add(
108                     new Field(ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, IndexerField.KEYWORD_STORED));
109             allGroupsFields.add(new StoredField(
110                     ArtifactInfo.ALL_GROUPS_LIST,
111                     ArtifactInfo.lst2str(context.getAllGroups()),
112                     IndexerField.KEYWORD_STORED));
113             writeDocumentFields(allGroupsFields);
114         }
115 
116         {
117             List<IndexableField> rootGroupsFields = new ArrayList<>(2);
118             rootGroupsFields.add(
119                     new Field(ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, IndexerField.KEYWORD_STORED));
120             rootGroupsFields.add(new StoredField(
121                     ArtifactInfo.ROOT_GROUPS_LIST,
122                     ArtifactInfo.lst2str(context.getRootGroups()),
123                     IndexerField.KEYWORD_STORED));
124             writeDocumentFields(rootGroupsFields);
125         }
126     }
127 
128     public int writeDocuments(IndexReader r, List<Integer> docIndexes) throws IOException {
129         int n = 0;
130         Bits liveDocs = MultiBits.getLiveDocs(r);
131 
132         if (docIndexes == null) {
133             for (int i = 0; i < r.maxDoc(); i++) {
134                 if (liveDocs == null || liveDocs.get(i)) {
135                     if (writeDocument(r.document(i))) {
136                         n++;
137                     }
138                 }
139             }
140         } else {
141             for (int i : docIndexes) {
142                 if (liveDocs == null || liveDocs.get(i)) {
143                     if (writeDocument(r.document(i))) {
144                         n++;
145                     }
146                 }
147             }
148         }
149 
150         return n;
151     }
152 
153     public boolean writeDocument(final Document document) throws IOException {
154         List<IndexableField> fields = document.getFields();
155 
156         List<IndexableField> storedFields = new ArrayList<>(fields.size());
157 
158         for (IndexableField field : fields) {
159             if (DefaultIndexingContext.FLD_DESCRIPTOR.equals(field.name())) {
160                 if (descriptorWritten) {
161                     return false;
162                 } else {
163                     descriptorWritten = true;
164                 }
165             }
166 
167             if (field.fieldType().stored()) {
168                 storedFields.add(field);
169             }
170         }
171 
172         writeDocumentFields(storedFields);
173 
174         return true;
175     }
176 
177     public void writeDocumentFields(List<IndexableField> fields) throws IOException {
178         dos.writeInt(fields.size());
179 
180         for (IndexableField field : fields) {
181             writeField(field);
182         }
183     }
184 
185     public void writeField(IndexableField field) throws IOException {
186         int flags = (field.fieldType().indexOptions() != IndexOptions.NONE ? F_INDEXED : 0) //
187                 + (field.fieldType().tokenized() ? F_TOKENIZED : 0) //
188                 + (field.fieldType().stored() ? F_STORED : 0); //
189         // + ( false ? F_COMPRESSED : 0 ); // Compressed not supported anymore
190 
191         String name = field.name();
192         String value = field.stringValue();
193 
194         dos.write(flags);
195         dos.writeUTF(name);
196         writeUTF(value, dos);
197     }
198 
199     private static void writeUTF(String str, DataOutput out) throws IOException {
200         int strlen = str.length();
201         int utflen = 0;
202         int c;
203 
204         // use charAt instead of copying String to char array
205         for (int i = 0; i < strlen; i++) {
206             c = str.charAt(i);
207             if ((c >= 0x0001) && (c <= 0x007F)) {
208                 utflen++;
209             } else if (c > 0x07FF) {
210                 utflen += 3;
211             } else {
212                 utflen += 2;
213             }
214         }
215 
216         // TODO optimize storing int value
217         out.writeInt(utflen);
218 
219         byte[] bytearr = new byte[utflen];
220 
221         int count = 0;
222 
223         int i = 0;
224         for (; i < strlen; i++) {
225             c = str.charAt(i);
226             if (!((c >= 0x0001) && (c <= 0x007F))) {
227                 break;
228             }
229             bytearr[count++] = (byte) c;
230         }
231 
232         for (; i < strlen; i++) {
233             c = str.charAt(i);
234             if ((c >= 0x0001) && (c <= 0x007F)) {
235                 bytearr[count++] = (byte) c;
236 
237             } else if (c > 0x07FF) {
238                 bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
239                 bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
240                 bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
241             } else {
242                 bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
243                 bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
244             }
245         }
246 
247         out.write(bytearr, 0, utflen);
248     }
249 }