View Javadoc
1   package org.apache.maven.index.updater;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedOutputStream;
23  import java.io.DataOutput;
24  import java.io.DataOutputStream;
25  import java.io.IOException;
26  import java.io.OutputStream;
27  import java.util.ArrayList;
28  import java.util.Date;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Set;
32  import java.util.zip.GZIPOutputStream;
33  import org.apache.lucene.document.Document;
34  import org.apache.lucene.document.Field;
35  import org.apache.lucene.index.IndexOptions;
36  import org.apache.lucene.index.IndexReader;
37  import org.apache.lucene.index.IndexableField;
38  import org.apache.lucene.index.MultiBits;
39  import org.apache.lucene.util.Bits;
40  import org.apache.maven.index.ArtifactInfo;
41  import org.apache.maven.index.IndexerField;
42  import org.apache.maven.index.context.DefaultIndexingContext;
43  import org.apache.maven.index.context.IndexingContext;
44  
45  /**
46   * An index data writer used to write transfer index format.
47   * 
48   * @author Eugene Kuleshov
49   */
50  public class IndexDataWriter
51  {
52      static final int VERSION = 1;
53  
54      static final int F_INDEXED = 1;
55  
56      static final int F_TOKENIZED = 2;
57  
58      static final int F_STORED = 4;
59  
60      static final int F_COMPRESSED = 8;
61  
62      private final DataOutputStream dos;
63  
64      private final GZIPOutputStream gos;
65  
66      private final BufferedOutputStream bos;
67  
68      private final Set<String> allGroups;
69  
70      private final Set<String> rootGroups;
71  
72      private boolean descriptorWritten;
73  
74      public IndexDataWriter( OutputStream os )
75          throws IOException
76      {
77          bos = new BufferedOutputStream( os, 1024 * 8 );
78          gos = new GZIPOutputStream( bos, 1024 * 2 );
79          dos = new DataOutputStream( gos );
80  
81          this.allGroups = new HashSet<>();
82          this.rootGroups = new HashSet<>();
83          this.descriptorWritten = false;
84      }
85  
86      public int write( IndexingContext context, IndexReader indexReader, List<Integer> docIndexes )
87          throws IOException
88      {
89          writeHeader( context );
90  
91          int n = writeDocuments( indexReader, docIndexes );
92  
93          writeGroupFields();
94  
95          close();
96  
97          return n;
98      }
99  
100     public void close()
101         throws IOException
102     {
103         dos.flush();
104 
105         gos.flush();
106         gos.finish();
107 
108         bos.flush();
109     }
110 
111     public void writeHeader( IndexingContext context )
112         throws IOException
113     {
114         dos.writeByte( VERSION );
115 
116         Date timestamp = context.getTimestamp();
117         dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
118     }
119 
120     public void writeGroupFields()
121         throws IOException
122     {
123         {
124             List<IndexableField> allGroupsFields = new ArrayList<>( 2 );
125             allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE,
126                                             IndexerField.KEYWORD_STORED ) );
127             allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ),
128                                             IndexerField.KEYWORD_STORED ) );
129             writeDocumentFields( allGroupsFields );
130         }
131 
132         {
133             List<IndexableField> rootGroupsFields = new ArrayList<>( 2 );
134             rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE,
135                                              IndexerField.KEYWORD_STORED ) );
136             rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ),
137                                              IndexerField.KEYWORD_STORED ) );
138             writeDocumentFields( rootGroupsFields );
139         }
140     }
141 
142     public int writeDocuments( IndexReader r, List<Integer> docIndexes )
143         throws IOException
144     {
145         int n = 0;
146         Bits liveDocs = MultiBits.getLiveDocs( r );
147 
148         if ( docIndexes == null )
149         {
150             for ( int i = 0; i < r.maxDoc(); i++ )
151             {
152                 if ( liveDocs == null || liveDocs.get( i ) )
153                 {
154                     if ( writeDocument( r.document( i ) ) )
155                     {
156                         n++;
157                     }
158                 }
159             }
160         }
161         else
162         {
163             for ( int i : docIndexes )
164             {
165                 if ( liveDocs == null || liveDocs.get( i ) )
166                 {
167                     if ( writeDocument( r.document( i ) ) )
168                     {
169                         n++;
170                     }
171                 }
172             }
173         }
174 
175         return n;
176     }
177 
178     public boolean writeDocument( final Document document )
179         throws IOException
180     {
181         List<IndexableField> fields = document.getFields();
182 
183         List<IndexableField> storedFields = new ArrayList<>( fields.size() );
184 
185         for ( IndexableField field : fields )
186         {
187             if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
188             {
189                 if ( descriptorWritten )
190                 {
191                     return false;
192                 }
193                 else
194                 {
195                     descriptorWritten = true;
196                 }
197             }
198 
199             if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) )
200             {
201                 final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST );
202 
203                 if ( groupList != null && groupList.trim().length() > 0 )
204                 {
205                     allGroups.addAll( ArtifactInfo.str2lst( groupList ) );
206                 }
207 
208                 return false;
209             }
210 
211             if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) )
212             {
213                 final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST );
214 
215                 if ( groupList != null && groupList.trim().length() > 0 )
216                 {
217                     rootGroups.addAll( ArtifactInfo.str2lst( groupList ) );
218                 }
219 
220                 return false;
221             }
222 
223             if ( field.fieldType().stored() )
224             {
225                 storedFields.add( field );
226             }
227         }
228 
229         writeDocumentFields( storedFields );
230 
231         return true;
232     }
233 
234     public void writeDocumentFields( List<IndexableField> fields )
235         throws IOException
236     {
237         dos.writeInt( fields.size() );
238 
239         for ( IndexableField field : fields )
240         {
241             writeField( field );
242         }
243     }
244 
245     public void writeField( IndexableField field )
246         throws IOException
247     {
248         int flags = ( field.fieldType().indexOptions() != IndexOptions.NONE  ? F_INDEXED : 0 ) //
249             + ( field.fieldType().tokenized() ? F_TOKENIZED : 0 ) //
250             + ( field.fieldType().stored() ? F_STORED : 0 ); //
251         // + ( false ? F_COMPRESSED : 0 ); // Compressed not supported anymore
252 
253         String name = field.name();
254         String value = field.stringValue();
255 
256         dos.write( flags );
257         dos.writeUTF( name );
258         writeUTF( value, dos );
259     }
260 
261     private static void writeUTF( String str, DataOutput out )
262         throws IOException
263     {
264         int strlen = str.length();
265         int utflen = 0;
266         int c;
267 
268         // use charAt instead of copying String to char array
269         for ( int i = 0; i < strlen; i++ )
270         {
271             c = str.charAt( i );
272             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
273             {
274                 utflen++;
275             }
276             else if ( c > 0x07FF )
277             {
278                 utflen += 3;
279             }
280             else
281             {
282                 utflen += 2;
283             }
284         }
285 
286         // TODO optimize storing int value
287         out.writeInt( utflen );
288 
289         byte[] bytearr = new byte[utflen];
290 
291         int count = 0;
292 
293         int i = 0;
294         for ( ; i < strlen; i++ )
295         {
296             c = str.charAt( i );
297             if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
298             {
299                 break;
300             }
301             bytearr[count++] = (byte) c;
302         }
303 
304         for ( ; i < strlen; i++ )
305         {
306             c = str.charAt( i );
307             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
308             {
309                 bytearr[count++] = (byte) c;
310 
311             }
312             else if ( c > 0x07FF )
313             {
314                 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
315                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
316                 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
317             }
318             else
319             {
320                 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
321                 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
322             }
323         }
324 
325         out.write( bytearr, 0, utflen );
326     }
327 
328 }