View Javadoc

1   package org.apache.maven.scm.provider.git.gitexe.command.changelog;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   * http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.apache.maven.scm.ChangeFile;
23  import org.apache.maven.scm.ChangeSet;
24  import org.apache.maven.scm.ScmFileStatus;
25  import org.apache.maven.scm.log.ScmLogger;
26  import org.apache.maven.scm.util.AbstractConsumer;
27  import org.apache.regexp.RE;
28  import org.apache.regexp.RESyntaxException;
29  
30  import java.util.ArrayList;
31  import java.util.Calendar;
32  import java.util.Date;
33  import java.util.List;
34  import java.util.Locale;
35  import java.util.TimeZone;
36  
37  /**
38   * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
39   * @author Olivier Lamy
40   *
41   */
42  public class GitChangeLogConsumer
43      extends AbstractConsumer
44  {
45      /**
46       * Date formatter for git timestamp
47       * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
48       */
49      private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
50  
51      /**
52       * State machine constant: expecting header
53       */
54      private static final int STATUS_GET_HEADER = 1;
55  
56      /**
57       * State machine constant: expecting author information
58       */
59      private static final int STATUS_GET_AUTHOR = 2;
60  
61      /**
62       * State machine constant: expecting parent hash information
63       */
64      private static final int STATUS_RAW_TREE = 21;
65  
66      /**
67       * State machine constant: expecting parent hash information
68       */
69      private static final int STATUS_RAW_PARENT = 22;
70  
71      /**
72       * State machine constant: expecting author name, email and timestamp information
73       */
74      private static final int STATUS_RAW_AUTHOR = 23;
75  
76      /**
77       * State machine constant: expecting committer name, email and timestamp information
78       */
79      private static final int STATUS_RAW_COMMITTER = 24;
80  
81      /**
82       * State machine constant: expecting date information
83       */
84      private static final int STATUS_GET_DATE = 3;
85  
86      /**
87       * State machine constant: expecting file information
88       */
89      private static final int STATUS_GET_FILE = 4;
90  
91      /**
92       * State machine constant: expecting comments
93       */
94      private static final int STATUS_GET_COMMENT = 5;
95  
96      /**
97       * The pattern used to match git header lines
98       */
99      private static final String HEADER_PATTERN = "^commit (.*)";
100 
101     /**
102      * The pattern used to match git author lines
103      */
104     private static final String AUTHOR_PATTERN = "^Author: (.*)";
105 
106     /**
107      * The pattern used to match git tree hash lines (raw mode)
108      */
109     private static final String RAW_TREE_PATTERN = "^tree ([:xdigit:]+)";
110 
111     /**
112      * The pattern used to match git parent hash lines (raw mode)
113      */
114     private static final String RAW_PARENT_PATTERN = "^parent ([:xdigit:]+)";
115 
116     /**
117      * The pattern used to match git author lines (raw mode)
118      */
119     private static final String RAW_AUTHOR_PATTERN = "^author (.+ <.+>) ([:digit:]+) (.*)";
120 
121     /**
122      * The pattern used to match git author lines (raw mode)
123      */
124     private static final String RAW_COMMITTER_PATTERN = "^committer (.+ <.+>) ([:digit:]+) (.*)";
125 
126     /**
127      * The pattern used to match git date lines
128      */
129     private static final String DATE_PATTERN = "^Date:\\s*(.*)";
130 
131     /**
132      * The pattern used to match git file lines
133      */
134     private static final String FILE_PATTERN =
135         "^:\\d* \\d* [:xdigit:]*\\.* [:xdigit:]*\\.* ([:upper:])[:digit:]*\\t([^\\t]*)(\\t(.*))?";
136 
137     /**
138      * Current status of the parser
139      */
140     private int status = STATUS_GET_HEADER;
141 
142     /**
143      * List of change log entries
144      */
145     private List<ChangeSet> entries = new ArrayList<ChangeSet>();
146 
147     /**
148      * The current log entry being processed by the parser
149      */
150     private ChangeSet currentChange;
151 
152     /**
153      * The current revision of the entry being processed by the parser
154      */
155     private String currentRevision;
156 
157     /**
158      * The current comment of the entry being processed by the parser
159      */
160     private StringBuilder currentComment;
161 
162     /**
163      * The regular expression used to match header lines
164      */
165     private RE headerRegexp;
166 
167     /**
168      * The regular expression used to match author lines
169      */
170     private RE authorRegexp;
171 
172     /**
173      * The regular expression used to match tree hash lines in raw mode
174      */
175     private RE rawTreeRegexp;
176 
177     /**
178      * The regular expression used to match parent hash lines in raw mode
179      */
180     private RE rawParentRegexp;
181 
182     /**
183      * The regular expression used to match author lines in raw mode
184      */
185     private RE rawAuthorRegexp;
186 
187     /**
188      * The regular expression used to match committer lines in raw mode
189      */
190     private RE rawCommitterRegexp;
191 
192     /**
193      * The regular expression used to match date lines
194      */
195     private RE dateRegexp;
196 
197     /**
198      * The regular expression used to match file lines
199      */
200     private RE fileRegexp;
201 
202     private String userDateFormat;
203 
204     /**
205      * Default constructor.
206      */
207     public GitChangeLogConsumer( ScmLogger logger, String userDateFormat )
208     {
209         super( logger );
210 
211         this.userDateFormat = userDateFormat;
212 
213         try
214         {
215             headerRegexp = new RE( HEADER_PATTERN );
216             authorRegexp = new RE( AUTHOR_PATTERN );
217             dateRegexp = new RE( DATE_PATTERN );
218             fileRegexp = new RE( FILE_PATTERN );
219             rawTreeRegexp = new RE( RAW_TREE_PATTERN );
220             rawParentRegexp = new RE( RAW_PARENT_PATTERN );
221             rawAuthorRegexp = new RE( RAW_AUTHOR_PATTERN );
222             rawCommitterRegexp = new RE( RAW_COMMITTER_PATTERN );
223         }
224         catch ( RESyntaxException ex )
225         {
226             throw new RuntimeException(
227                 "INTERNAL ERROR: Could not create regexp to parse git log file. This shouldn't happen. Something is probably wrong with the oro installation.",
228                 ex );
229         }
230     }
231 
232     public List<ChangeSet> getModifications()
233     {
234         // this is needed since the processFile does not always get a the end-sequence correctly.
235         processGetFile( "" );
236 
237         return entries;
238     }
239 
240     // ----------------------------------------------------------------------
241     // StreamConsumer Implementation
242     // ----------------------------------------------------------------------
243 
244     /**
245      * {@inheritDoc}
246      */
247     public void consumeLine( String line )
248     {
249         switch ( status )
250         {
251             case STATUS_GET_HEADER:
252                 processGetHeader( line );
253                 break;
254             case STATUS_GET_AUTHOR:
255                 processGetAuthor( line );
256                 break;
257             case STATUS_GET_DATE:
258                 processGetDate( line, null );
259                 break;
260             case STATUS_GET_COMMENT:
261                 processGetComment( line );
262                 break;
263             case STATUS_GET_FILE:
264                 processGetFile( line );
265                 break;
266             case STATUS_RAW_TREE:
267                 processGetRawTree( line );
268                 break;
269             case STATUS_RAW_PARENT:
270                 processGetRawParent( line );
271                 break;
272             case STATUS_RAW_AUTHOR:
273                 processGetRawAuthor( line );
274                 break;
275             case STATUS_RAW_COMMITTER:
276                 processGetRawCommitter( line );
277                 break;
278             default:
279                 throw new IllegalStateException( "Unknown state: " + status );
280         }
281     }
282 
283     // ----------------------------------------------------------------------
284     //
285     // ----------------------------------------------------------------------
286 
287     /**
288      * Process the current input line in the GET_HEADER state.  The
289      * author, date, and the revision of the entry are gathered.  Note,
290      * Git does not have per-file revisions, instead, the entire
291      * branch is given a single revision number, which is also used for
292      * the revision number of each file.
293      *
294      * @param line A line of text from the git log output
295      */
296     private void processGetHeader( String line )
297     {
298         if ( !headerRegexp.match( line ) )
299         {
300             return;
301         }
302 
303         currentRevision = headerRegexp.getParen( 1 );
304 
305         currentChange = new ChangeSet();
306 
307         currentChange.setRevision( currentRevision );
308 
309         status = STATUS_GET_AUTHOR;
310     }
311 
312     /**
313      * Process the current input line in the STATUS_GET_AUTHOR state.  This
314      * state gathers all of the author information that are part of a log entry.
315      *
316      * @param line a line of text from the git log output
317      */
318     private void processGetAuthor( String line )
319     {
320         // this autodetects 'raw' format
321         if ( rawTreeRegexp.match( line ) )
322         {
323             status = STATUS_RAW_TREE;
324             processGetRawTree( line );
325             return;
326         }
327 
328         if ( !authorRegexp.match( line ) )
329         {
330             return;
331         }
332         String author = authorRegexp.getParen( 1 );
333 
334         currentChange.setAuthor( author );
335 
336         status = STATUS_GET_DATE;
337     }
338 
339     /**
340      * Process the current input line in the STATUS_RAW_TREE state.  This
341      * state gathers tree hash part of a log entry.
342      *
343      * @param line a line of text from the git log output
344      */
345     private void processGetRawTree( String line )
346     {
347         if ( !rawTreeRegexp.match( line ) )
348         {
349             return;
350         }
351         //here we could set treeHash if it appears in the model: currentChange.setTreeHash( rawTreeRegexp.getParen( 1 ) );
352         status = STATUS_RAW_PARENT;
353     }
354 
355     /**
356      * Process the current input line in the STATUS_RAW_PARENT state.  This
357      * state gathers parent revisions of a log entry.
358      *
359      * @param line a line of text from the git log output
360      */
361     private void processGetRawParent( String line )
362     {
363         if ( !rawParentRegexp.match( line ) )
364         {
365             status = STATUS_RAW_AUTHOR;
366             processGetRawAuthor( line );
367             return;
368         }
369         String parentHash = rawParentRegexp.getParen( 1 );
370 
371         addParentRevision( parentHash );
372     }
373 
374     /**
375      * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the log.
376      * This method takes care of the difference.
377      *
378      * @param hash -
379      */
380     private void addParentRevision( String hash )
381     {
382         if ( currentChange.getParentRevision() == null )
383         {
384             currentChange.setParentRevision( hash );
385         }
386         else
387         {
388             currentChange.addMergedRevision( hash );
389         }
390     }
391 
392     /**
393      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
394      * state gathers all the author information of a log entry.
395      *
396      * @param line a line of text from the git log output
397      */
398     private void processGetRawAuthor( String line )
399     {
400         if ( !rawAuthorRegexp.match( line ) )
401         {
402             return;
403         }
404         String author = rawAuthorRegexp.getParen( 1 );
405         currentChange.setAuthor( author );
406 
407         String datestring = rawAuthorRegexp.getParen( 2 );
408         String tz = rawAuthorRegexp.getParen( 3 );
409 
410         // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
411         // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
412         Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) );
413         c.setTimeInMillis( Long.parseLong( datestring ) * 1000 );
414         currentChange.setDate( c.getTime() );
415 
416         status = STATUS_RAW_COMMITTER;
417     }
418 
419     /**
420      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
421      * state gathers all the committer information of a log entry.
422      *
423      * @param line a line of text from the git log output
424      */
425     private void processGetRawCommitter( String line )
426     {
427         if ( !rawCommitterRegexp.match( line ) )
428         {
429             return;
430         }
431         // here we could set committer and committerDate, the same way as in processGetRawAuthor
432         status = STATUS_GET_COMMENT;
433     }
434 
435     /**
436      * Process the current input line in the STATUS_GET_DATE state.  This
437      * state gathers all of the date information that are part of a log entry.
438      *
439      * @param line a line of text from the git log output
440      */
441     private void processGetDate( String line, Locale locale )
442     {
443         if ( !dateRegexp.match( line ) )
444         {
445             return;
446         }
447 
448         String datestring = dateRegexp.getParen( 1 );
449 
450         Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale );
451 
452         currentChange.setDate( date );
453 
454         status = STATUS_GET_COMMENT;
455     }
456 
457     /**
458      * Process the current input line in the GET_COMMENT state.  This
459      * state gathers all of the comments that are part of a log entry.
460      *
461      * @param line a line of text from the git log output
462      */
463     private void processGetComment( String line )
464     {
465         if ( line.length() < 4 )
466         {
467             if ( currentComment == null )
468             {
469                 currentComment = new StringBuilder();
470             }
471             else
472             {
473                 currentChange.setComment( currentComment.toString() );
474                 status = STATUS_GET_FILE;
475             }
476         }
477         else
478         {
479             if ( currentComment.length() > 0 )
480             {
481                 currentComment.append( '\n' );
482             }
483 
484             currentComment.append( line.substring( 4 ) );
485         }
486     }
487 
488     /**
489      * Process the current input line in the GET_FILE state.  This state
490      * adds each file entry line to the current change log entry.  Note,
491      * the revision number for the entire entry is used for the revision
492      * number of each file.
493      *
494      * @param line A line of text from the git log output
495      */
496     private void processGetFile( String line )
497     {
498         if ( line.length() == 0 )
499         {
500             if ( currentChange != null )
501             {
502                 entries.add( currentChange );
503             }
504 
505             resetChangeLog();
506 
507             status = STATUS_GET_HEADER;
508         }
509         else
510         {
511             if ( !fileRegexp.match( line ) )
512             {
513                 return;
514             }
515             final String actionChar = fileRegexp.getParen( 1 );
516             // action is currently not used
517             final ScmFileStatus action;
518             String name = fileRegexp.getParen( 2 );
519             String originalName = null;
520             String originalRevision = null;
521             if ( "A".equals( actionChar ) )
522             {
523                 action = ScmFileStatus.ADDED;
524             }
525             else if ( "M".equals( actionChar ) )
526             {
527                 action = ScmFileStatus.MODIFIED;
528             }
529             else if ( "D".equals( actionChar ) )
530             {
531                 action = ScmFileStatus.DELETED;
532             }
533             else if ( "R".equals( actionChar ) )
534             {
535                 action = ScmFileStatus.RENAMED;
536                 originalName = name;
537                 name = fileRegexp.getParen( 4 );
538                 originalRevision = currentChange.getParentRevision();
539             }
540             else if ( "C".equals( actionChar ) )
541             {
542                 action = ScmFileStatus.COPIED;
543                 originalName = name;
544                 name = fileRegexp.getParen( 4 );
545                 originalRevision = currentChange.getParentRevision();
546             }
547             else
548             {
549                 action = ScmFileStatus.UNKNOWN;
550             }
551 
552             final ChangeFile changeFile = new ChangeFile( name, currentRevision );
553             changeFile.setAction( action );
554             changeFile.setOriginalName( originalName );
555             changeFile.setOriginalRevision( originalRevision );
556             currentChange.addFile( changeFile );
557         }
558     }
559 
560     private void resetChangeLog()
561     {
562         currentComment = null;
563         currentChange = null;
564     }
565 }