View Javadoc

1   package org.apache.maven.index.updater;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedOutputStream;
23  import java.io.DataOutput;
24  import java.io.DataOutputStream;
25  import java.io.IOException;
26  import java.io.OutputStream;
27  import java.util.ArrayList;
28  import java.util.Date;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Set;
32  import java.util.zip.GZIPOutputStream;
33  
34  import org.apache.lucene.document.Document;
35  import org.apache.lucene.document.Field;
36  import org.apache.lucene.document.Fieldable;
37  import org.apache.lucene.index.IndexReader;
38  import org.apache.lucene.search.IndexSearcher;
39  import org.apache.maven.index.ArtifactInfo;
40  import org.apache.maven.index.context.DefaultIndexingContext;
41  import org.apache.maven.index.context.IndexingContext;
42  
43  /**
44   * An index data writer used to write transfer index format.
45   * 
46   * @author Eugene Kuleshov
47   */
48  public class IndexDataWriter
49  {
50      static final int VERSION = 1;
51  
52      static final int F_INDEXED = 1;
53  
54      static final int F_TOKENIZED = 2;
55  
56      static final int F_STORED = 4;
57  
58      static final int F_COMPRESSED = 8;
59  
60      private final DataOutputStream dos;
61  
62      private final GZIPOutputStream gos;
63  
64      private final BufferedOutputStream bos;
65  
66      private final Set<String> allGroups;
67  
68      private final Set<String> rootGroups;
69  
70      private boolean descriptorWritten;
71  
72      public IndexDataWriter( OutputStream os )
73          throws IOException
74      {
75          bos = new BufferedOutputStream( os, 1024 * 8 );
76          gos = new GZIPOutputStream( bos, 1024 * 2 );
77          dos = new DataOutputStream( gos );
78  
79          this.allGroups = new HashSet<String>();
80          this.rootGroups = new HashSet<String>();
81          this.descriptorWritten = false;
82      }
83  
84      public int write( IndexingContext context, List<Integer> docIndexes )
85          throws IOException
86      {
87          writeHeader( context );
88  
89          int n = 0;
90          final IndexSearcher indexSearcher = context.acquireIndexSearcher();
91          try
92          {
93              n = writeDocuments( indexSearcher.getIndexReader(), docIndexes );
94          }
95          finally
96          {
97              context.releaseIndexSearcher( indexSearcher );
98          }
99  
100         writeGroupFields();
101 
102         close();
103 
104         return n;
105     }
106 
107     public void close()
108         throws IOException
109     {
110         dos.flush();
111 
112         gos.flush();
113         gos.finish();
114 
115         bos.flush();
116     }
117 
118     public void writeHeader( IndexingContext context )
119         throws IOException
120     {
121         dos.writeByte( VERSION );
122 
123         Date timestamp = context.getTimestamp();
124         dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
125     }
126 
127     public void writeGroupFields()
128         throws IOException
129     {
130         {
131             List<Fieldable> allGroupsFields = new ArrayList<Fieldable>( 2 );
132             allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, Field.Store.YES,
133                 Field.Index.NOT_ANALYZED ) );
134             allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ),
135                 Field.Store.YES, Field.Index.NO ) );
136             writeDocumentFields( allGroupsFields );
137         }
138 
139         {
140             List<Fieldable> rootGroupsFields = new ArrayList<Fieldable>( 2 );
141             rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, Field.Store.YES,
142                 Field.Index.NOT_ANALYZED ) );
143             rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ),
144                 Field.Store.YES, Field.Index.NO ) );
145             writeDocumentFields( rootGroupsFields );
146         }
147     }
148 
149     public int writeDocuments( IndexReader r, List<Integer> docIndexes )
150         throws IOException
151     {
152         int n = 0;
153 
154         if ( docIndexes == null )
155         {
156             for ( int i = 0; i < r.maxDoc(); i++ )
157             {
158                 if ( !r.isDeleted( i ) )
159                 {
160                     if ( writeDocument( r.document( i ) ) )
161                     {
162                         n++;
163                     }
164                 }
165             }
166         }
167         else
168         {
169             for ( int i : docIndexes )
170             {
171                 if ( !r.isDeleted( i ) )
172                 {
173                     if ( writeDocument( r.document( i ) ) )
174                     {
175                         n++;
176                     }
177                 }
178             }
179         }
180 
181         return n;
182     }
183 
184     public boolean writeDocument( final Document document )
185         throws IOException
186     {
187         List<Fieldable> fields = document.getFields();
188 
189         List<Fieldable> storedFields = new ArrayList<Fieldable>( fields.size() );
190 
191         for ( Fieldable field : fields )
192         {
193             if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
194             {
195                 if ( descriptorWritten )
196                 {
197                     return false;
198                 }
199                 else
200                 {
201                     descriptorWritten = true;
202                 }
203             }
204 
205             if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) )
206             {
207                 final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST );
208 
209                 if ( groupList != null && groupList.trim().length() > 0 )
210                 {
211                     allGroups.addAll( ArtifactInfo.str2lst( groupList ) );
212                 }
213 
214                 return false;
215             }
216 
217             if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) )
218             {
219                 final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST );
220 
221                 if ( groupList != null && groupList.trim().length() > 0 )
222                 {
223                     rootGroups.addAll( ArtifactInfo.str2lst( groupList ) );
224                 }
225 
226                 return false;
227             }
228 
229             if ( field.isStored() )
230             {
231                 storedFields.add( field );
232             }
233         }
234 
235         writeDocumentFields( storedFields );
236 
237         return true;
238     }
239 
240     public void writeDocumentFields( List<Fieldable> fields )
241         throws IOException
242     {
243         dos.writeInt( fields.size() );
244 
245         for ( Fieldable field : fields )
246         {
247             writeField( field );
248         }
249     }
250 
251     public void writeField( Fieldable field )
252         throws IOException
253     {
254         int flags = ( field.isIndexed() ? F_INDEXED : 0 ) //
255             + ( field.isTokenized() ? F_TOKENIZED : 0 ) //
256             + ( field.isStored() ? F_STORED : 0 ); //
257         // + ( false ? F_COMPRESSED : 0 ); // Compressed not supported anymore
258 
259         String name = field.name();
260         String value = field.stringValue();
261 
262         dos.write( flags );
263         dos.writeUTF( name );
264         writeUTF( value, dos );
265     }
266 
267     private static void writeUTF( String str, DataOutput out )
268         throws IOException
269     {
270         int strlen = str.length();
271         int utflen = 0;
272         int c;
273 
274         // use charAt instead of copying String to char array
275         for ( int i = 0; i < strlen; i++ )
276         {
277             c = str.charAt( i );
278             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
279             {
280                 utflen++;
281             }
282             else if ( c > 0x07FF )
283             {
284                 utflen += 3;
285             }
286             else
287             {
288                 utflen += 2;
289             }
290         }
291 
292         // TODO optimize storing int value
293         out.writeInt( utflen );
294 
295         byte[] bytearr = new byte[utflen];
296 
297         int count = 0;
298 
299         int i = 0;
300         for ( ; i < strlen; i++ )
301         {
302             c = str.charAt( i );
303             if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
304             {
305                 break;
306             }
307             bytearr[count++] = (byte) c;
308         }
309 
310         for ( ; i < strlen; i++ )
311         {
312             c = str.charAt( i );
313             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
314             {
315                 bytearr[count++] = (byte) c;
316 
317             }
318             else if ( c > 0x07FF )
319             {
320                 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
321                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
322                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
323             }
324             else
325             {
326                 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
327                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
328             }
329         }
330 
331         out.write( bytearr, 0, utflen );
332     }
333 
334 }