View Javadoc
1   package org.apache.maven.index.updater;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedOutputStream;
23  import java.io.DataOutput;
24  import java.io.DataOutputStream;
25  import java.io.IOException;
26  import java.io.OutputStream;
27  import java.util.ArrayList;
28  import java.util.Date;
29  import java.util.List;
30  import java.util.zip.GZIPOutputStream;
31  import org.apache.lucene.document.Document;
32  import org.apache.lucene.document.Field;
33  import org.apache.lucene.document.StoredField;
34  import org.apache.lucene.index.IndexOptions;
35  import org.apache.lucene.index.IndexReader;
36  import org.apache.lucene.index.IndexableField;
37  import org.apache.lucene.index.MultiBits;
38  import org.apache.lucene.util.Bits;
39  import org.apache.maven.index.ArtifactInfo;
40  import org.apache.maven.index.IndexerField;
41  import org.apache.maven.index.context.DefaultIndexingContext;
42  import org.apache.maven.index.context.IndexingContext;
43  
44  /**
45   * An index data writer used to write transfer index format.
46   * 
47   * @author Eugene Kuleshov
48   */
49  public class IndexDataWriter
50  {
51      static final int VERSION = 1;
52  
53      static final int F_INDEXED = 1;
54  
55      static final int F_TOKENIZED = 2;
56  
57      static final int F_STORED = 4;
58  
59      static final int F_COMPRESSED = 8;
60  
61      private final DataOutputStream dos;
62  
63      private final GZIPOutputStream gos;
64  
65      private final BufferedOutputStream bos;
66  
67      private boolean descriptorWritten;
68  
69      public IndexDataWriter( OutputStream os )
70          throws IOException
71      {
72          bos = new BufferedOutputStream( os, 1024 * 8 );
73          gos = new GZIPOutputStream( bos, 1024 * 2 );
74          dos = new DataOutputStream( gos );
75  
76          this.descriptorWritten = false;
77      }
78  
79      public int write( IndexingContext context, IndexReader indexReader, List<Integer> docIndexes )
80          throws IOException
81      {
82          writeHeader( context );
83  
84          int n = writeDocuments( indexReader, docIndexes );
85  
86          writeGroupFields( context );
87  
88          close();
89  
90          return n;
91      }
92  
93      public void close()
94          throws IOException
95      {
96          dos.flush();
97  
98          gos.flush();
99          gos.finish();
100 
101         bos.flush();
102     }
103 
104     public void writeHeader( IndexingContext context )
105         throws IOException
106     {
107         dos.writeByte( VERSION );
108 
109         Date timestamp = context.getTimestamp();
110         dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
111     }
112 
113     public void writeGroupFields( IndexingContext context )
114         throws IOException
115     {
116         {
117             List<IndexableField> allGroupsFields = new ArrayList<>( 2 );
118             allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE,
119                                             IndexerField.KEYWORD_STORED ) );
120             allGroupsFields.add( new StoredField( ArtifactInfo.ALL_GROUPS_LIST,
121                     ArtifactInfo.lst2str( context.getAllGroups() ), IndexerField.KEYWORD_STORED ) );
122             writeDocumentFields( allGroupsFields );
123         }
124 
125         {
126             List<IndexableField> rootGroupsFields = new ArrayList<>( 2 );
127             rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE,
128                                              IndexerField.KEYWORD_STORED ) );
129             rootGroupsFields.add( new StoredField( ArtifactInfo.ROOT_GROUPS_LIST,
130                     ArtifactInfo.lst2str( context.getRootGroups() ), IndexerField.KEYWORD_STORED ) );
131             writeDocumentFields( rootGroupsFields );
132         }
133     }
134 
135     public int writeDocuments( IndexReader r, List<Integer> docIndexes )
136         throws IOException
137     {
138         int n = 0;
139         Bits liveDocs = MultiBits.getLiveDocs( r );
140 
141         if ( docIndexes == null )
142         {
143             for ( int i = 0; i < r.maxDoc(); i++ )
144             {
145                 if ( liveDocs == null || liveDocs.get( i ) )
146                 {
147                     if ( writeDocument( r.document( i ) ) )
148                     {
149                         n++;
150                     }
151                 }
152             }
153         }
154         else
155         {
156             for ( int i : docIndexes )
157             {
158                 if ( liveDocs == null || liveDocs.get( i ) )
159                 {
160                     if ( writeDocument( r.document( i ) ) )
161                     {
162                         n++;
163                     }
164                 }
165             }
166         }
167 
168         return n;
169     }
170 
171     public boolean writeDocument( final Document document )
172         throws IOException
173     {
174         List<IndexableField> fields = document.getFields();
175 
176         List<IndexableField> storedFields = new ArrayList<>( fields.size() );
177 
178         for ( IndexableField field : fields )
179         {
180             if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
181             {
182                 if ( descriptorWritten )
183                 {
184                     return false;
185                 }
186                 else
187                 {
188                     descriptorWritten = true;
189                 }
190             }
191 
192             if ( field.fieldType().stored() )
193             {
194                 storedFields.add( field );
195             }
196         }
197 
198         writeDocumentFields( storedFields );
199 
200         return true;
201     }
202 
203     public void writeDocumentFields( List<IndexableField> fields )
204         throws IOException
205     {
206         dos.writeInt( fields.size() );
207 
208         for ( IndexableField field : fields )
209         {
210             writeField( field );
211         }
212     }
213 
214     public void writeField( IndexableField field )
215         throws IOException
216     {
217         int flags = ( field.fieldType().indexOptions() != IndexOptions.NONE  ? F_INDEXED : 0 ) //
218             + ( field.fieldType().tokenized() ? F_TOKENIZED : 0 ) //
219             + ( field.fieldType().stored() ? F_STORED : 0 ); //
220         // + ( false ? F_COMPRESSED : 0 ); // Compressed not supported anymore
221 
222         String name = field.name();
223         String value = field.stringValue();
224 
225         dos.write( flags );
226         dos.writeUTF( name );
227         writeUTF( value, dos );
228     }
229 
230     private static void writeUTF( String str, DataOutput out )
231         throws IOException
232     {
233         int strlen = str.length();
234         int utflen = 0;
235         int c;
236 
237         // use charAt instead of copying String to char array
238         for ( int i = 0; i < strlen; i++ )
239         {
240             c = str.charAt( i );
241             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
242             {
243                 utflen++;
244             }
245             else if ( c > 0x07FF )
246             {
247                 utflen += 3;
248             }
249             else
250             {
251                 utflen += 2;
252             }
253         }
254 
255         // TODO optimize storing int value
256         out.writeInt( utflen );
257 
258         byte[] bytearr = new byte[utflen];
259 
260         int count = 0;
261 
262         int i = 0;
263         for ( ; i < strlen; i++ )
264         {
265             c = str.charAt( i );
266             if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
267             {
268                 break;
269             }
270             bytearr[count++] = (byte) c;
271         }
272 
273         for ( ; i < strlen; i++ )
274         {
275             c = str.charAt( i );
276             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
277             {
278                 bytearr[count++] = (byte) c;
279 
280             }
281             else if ( c > 0x07FF )
282             {
283                 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
284                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
285                 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
286             }
287             else
288             {
289                 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
290                 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
291             }
292         }
293 
294         out.write( bytearr, 0, utflen );
295     }
296 
297 }