View Javadoc
1   package org.apache.maven.scm.provider.git.gitexe.command.changelog;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   * http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.apache.maven.scm.ChangeFile;
23  import org.apache.maven.scm.ChangeSet;
24  import org.apache.maven.scm.ScmFileStatus;
25  import org.apache.maven.scm.util.AbstractConsumer;
26  
27  import java.util.ArrayList;
28  import java.util.Calendar;
29  import java.util.Date;
30  import java.util.List;
31  import java.util.Locale;
32  import java.util.TimeZone;
33  import java.util.regex.Matcher;
34  import java.util.regex.Pattern;
35  
36  /**
37   * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
38   * @author Olivier Lamy
39   *
40   */
41  public class GitChangeLogConsumer
42      extends AbstractConsumer
43  {
44      /**
45       * Date formatter for git timestamp
46       * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
47       */
48      private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
49  
50      /**
51       * State machine constant: expecting header
52       */
53      private static final int STATUS_GET_HEADER = 1;
54  
55      /**
56       * State machine constant: expecting author information
57       */
58      private static final int STATUS_GET_AUTHOR = 2;
59  
60      /**
61       * State machine constant: expecting parent hash information
62       */
63      private static final int STATUS_RAW_TREE = 21;
64  
65      /**
66       * State machine constant: expecting parent hash information
67       */
68      private static final int STATUS_RAW_PARENT = 22;
69  
70      /**
71       * State machine constant: expecting author name, email and timestamp information
72       */
73      private static final int STATUS_RAW_AUTHOR = 23;
74  
75      /**
76       * State machine constant: expecting committer name, email and timestamp information
77       */
78      private static final int STATUS_RAW_COMMITTER = 24;
79  
80      /**
81       * State machine constant: expecting date information
82       */
83      private static final int STATUS_GET_DATE = 3;
84  
85      /**
86       * State machine constant: expecting file information
87       */
88      private static final int STATUS_GET_FILE = 4;
89  
90      /**
91       * State machine constant: expecting comments
92       */
93      private static final int STATUS_GET_COMMENT = 5;
94  
95      /**
96       * The pattern used to match git header lines
97       */
98      private static final Pattern HEADER_PATTERN = Pattern.compile( "^commit ([A-Fa-f0-9]+)(?: \\((.*)\\))?$" );
99  
100     /**
101      * The pattern used to match git author lines
102      */
103     private static final Pattern AUTHOR_PATTERN = Pattern.compile( "^Author: (.*)" );
104 
105     /**
106      * The pattern used to match git tree hash lines (raw mode)
107      */
108     private static final Pattern RAW_TREE_PATTERN = Pattern.compile( "^tree ([A-Fa-f0-9]+)" ); 
109 
110     /**
111      * The pattern used to match git parent hash lines (raw mode)
112      */
113     private static final Pattern RAW_PARENT_PATTERN = Pattern.compile( "^parent ([A-Fa-f0-9]+)" );
114 
115     /**
116      * The pattern used to match git author lines (raw mode)
117      */
118     private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile( "^author (.+ <.+>) ([0-9]+) (.*)" );
119 
120     /**
121      * The pattern used to match git author lines (raw mode)
122      */
123     private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile( "^committer (.+ <.+>) ([0-9]+) (.*)" );
124 
125     /**
126      * The pattern used to match git date lines
127      */
128     private static final Pattern DATE_PATTERN = Pattern.compile( "^Date:\\s*(.*)" );
129 
130     /**
131      * The pattern used to match git file lines
132      */
133     private static final Pattern FILE_PATTERN =
134         Pattern.compile( "^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?" );
135 
136     /**
137      * Current status of the parser
138      */
139     private int status = STATUS_GET_HEADER;
140 
141     /**
142      * List of change log entries
143      */
144     private final List<ChangeSet> entries = new ArrayList<>();
145 
146     /**
147      * The current log entry being processed by the parser
148      */
149     private ChangeSet currentChange;
150 
151     /**
152      * The current revision of the entry being processed by the parser
153      */
154     private String currentRevision;
155 
156     /**
157      * The current comment of the entry being processed by the parser
158      */
159     private StringBuilder currentComment;
160 
161     private final String userDateFormat;
162 
163     /**
164      * Default constructor.
165      */
166     public GitChangeLogConsumer( String userDateFormat )
167     {
168         this.userDateFormat = userDateFormat;
169     }
170 
171     public List<ChangeSet> getModifications()
172     {
173         // this is needed since the processFile does not always get a the end-sequence correctly.
174         processGetFile( "" );
175 
176         return entries;
177     }
178 
179     // ----------------------------------------------------------------------
180     // StreamConsumer Implementation
181     // ----------------------------------------------------------------------
182 
183     /**
184      * {@inheritDoc}
185      */
186     public void consumeLine( String line )
187     {
188         switch ( status )
189         {
190             case STATUS_GET_HEADER:
191                 processGetHeader( line );
192                 break;
193             case STATUS_GET_AUTHOR:
194                 processGetAuthor( line );
195                 break;
196             case STATUS_GET_DATE:
197                 processGetDate( line, null );
198                 break;
199             case STATUS_GET_COMMENT:
200                 processGetComment( line );
201                 break;
202             case STATUS_GET_FILE:
203                 processGetFile( line );
204                 break;
205             case STATUS_RAW_TREE:
206                 processGetRawTree( line );
207                 break;
208             case STATUS_RAW_PARENT:
209                 processGetRawParent( line );
210                 break;
211             case STATUS_RAW_AUTHOR:
212                 processGetRawAuthor( line );
213                 break;
214             case STATUS_RAW_COMMITTER:
215                 processGetRawCommitter( line );
216                 break;
217             default:
218                 throw new IllegalStateException( "Unknown state: " + status );
219         }
220     }
221 
222     // ----------------------------------------------------------------------
223     //
224     // ----------------------------------------------------------------------
225 
226     /**
227      * Process the current input line in the GET_HEADER state.  The
228      * author, date, and the revision of the entry are gathered.  Note,
229      * Git does not have per-file revisions, instead, the entire
230      * branch is given a single revision number, which is also used for
231      * the revision number of each file.
232      *
233      * @param line A line of text from the git log output
234      */
235     private void processGetHeader( String line )
236     {
237         Matcher matcher = HEADER_PATTERN.matcher( line );
238         if ( !matcher.matches() )
239         {
240             return;
241         }
242 
243         currentRevision = matcher.group( 1 );
244 
245         currentChange = new ChangeSet();
246 
247         currentChange.setRevision( currentRevision );
248 
249         // Extract the tags (if present)
250         String tagList = matcher.group( 2 );
251         if ( tagList != null )
252         {
253             String[] rawTags = tagList.split( "," );
254             for ( String rawTag : rawTags )
255             {
256                 String[] tagParts = rawTag.trim().split( ":" );
257                 if ( tagParts.length == 2 && "tag".equals( tagParts[0] ) )
258                 {
259                     currentChange.addTag( tagParts[1].trim() );
260                 }
261             }
262         }
263 
264         status = STATUS_GET_AUTHOR;
265     }
266 
267     /**
268      * Process the current input line in the STATUS_GET_AUTHOR state.  This
269      * state gathers all of the author information that are part of a log entry.
270      *
271      * @param line a line of text from the git log output
272      */
273     private void processGetAuthor( String line )
274     {
275         // this autodetects 'raw' format
276         if ( RAW_TREE_PATTERN.matcher( line ).matches() )
277         {
278             status = STATUS_RAW_TREE;
279             processGetRawTree( line );
280             return;
281         }
282 
283         Matcher matcher = AUTHOR_PATTERN.matcher( line );
284         if ( !matcher.matches() )
285         {
286             return;
287         }
288         String author = matcher.group( 1 );
289 
290         currentChange.setAuthor( author );
291 
292         status = STATUS_GET_DATE;
293     }
294 
295     /**
296      * Process the current input line in the STATUS_RAW_TREE state.  This
297      * state gathers tree hash part of a log entry.
298      *
299      * @param line a line of text from the git log output
300      */
301     private void processGetRawTree( String line )
302     {
303         if ( !RAW_TREE_PATTERN.matcher( line ).matches() )
304         {
305             return;
306         }
307         //here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) );
308         status = STATUS_RAW_PARENT;
309     }
310 
311     /**
312      * Process the current input line in the STATUS_RAW_PARENT state.  This
313      * state gathers parent revisions of a log entry.
314      *
315      * @param line a line of text from the git log output
316      */
317     private void processGetRawParent( String line )
318     {
319         Matcher matcher = RAW_PARENT_PATTERN.matcher( line );
320         if ( !matcher.matches() )
321         {
322             status = STATUS_RAW_AUTHOR;
323             processGetRawAuthor( line );
324             return;
325         }
326         String parentHash = matcher.group( 1 );
327 
328         addParentRevision( parentHash );
329     }
330 
331     /**
332      * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the
333      * log. This method takes care of the difference.
334      *
335      * @param hash -
336      */
337     private void addParentRevision( String hash )
338     {
339         if ( currentChange.getParentRevision() == null )
340         {
341             currentChange.setParentRevision( hash );
342         }
343         else
344         {
345             currentChange.addMergedRevision( hash );
346         }
347     }
348 
349     /**
350      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
351      * state gathers all the author information of a log entry.
352      *
353      * @param line a line of text from the git log output
354      */
355     private void processGetRawAuthor( String line )
356     {
357         Matcher matcher = RAW_AUTHOR_PATTERN.matcher( line );
358         if ( !matcher.matches() )
359         {
360             return;
361         }
362         String author = matcher.group( 1 );
363         currentChange.setAuthor( author );
364 
365         String datestring = matcher.group( 2 );
366         String tz = matcher.group( 3 );
367 
368         // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
369         // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
370         Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) );
371         c.setTimeInMillis( Long.parseLong( datestring ) * 1000 );
372         currentChange.setDate( c.getTime() );
373 
374         status = STATUS_RAW_COMMITTER;
375     }
376 
377     /**
378      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
379      * state gathers all the committer information of a log entry.
380      *
381      * @param line a line of text from the git log output
382      */
383     private void processGetRawCommitter( String line )
384     {
385         if ( !RAW_COMMITTER_PATTERN.matcher( line ).matches() )
386         {
387             return;
388         }
389         // here we could set committer and committerDate, the same way as in processGetRawAuthor
390         status = STATUS_GET_COMMENT;
391     }
392 
393     /**
394      * Process the current input line in the STATUS_GET_DATE state.  This
395      * state gathers all of the date information that are part of a log entry.
396      *
397      * @param line a line of text from the git log output
398      */
399     private void processGetDate( String line, Locale locale )
400     {
401         Matcher matcher = DATE_PATTERN.matcher( line );
402         if ( !matcher.matches() )
403         {
404             return;
405         }
406 
407         String datestring = matcher.group( 1 );
408 
409         Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale );
410 
411         currentChange.setDate( date );
412 
413         status = STATUS_GET_COMMENT;
414     }
415 
416     /**
417      * Process the current input line in the GET_COMMENT state.  This
418      * state gathers all of the comments that are part of a log entry.
419      *
420      * @param line a line of text from the git log output
421      */
422     private void processGetComment( String line )
423     {
424         if ( line.length() < 4 )
425         {
426             if ( currentComment == null )
427             {
428                 currentComment = new StringBuilder();
429             }
430             else
431             {
432                 currentChange.setComment( currentComment.toString() );
433                 status = STATUS_GET_FILE;
434             }
435         }
436         else
437         {
438             if ( currentComment.length() > 0 )
439             {
440                 currentComment.append( '\n' );
441             }
442 
443             currentComment.append( line.substring( 4 ) );
444         }
445     }
446 
447     /**
448      * Process the current input line in the GET_FILE state.  This state
449      * adds each file entry line to the current change log entry.  Note,
450      * the revision number for the entire entry is used for the revision
451      * number of each file.
452      *
453      * @param line A line of text from the git log output
454      */
455     private void processGetFile( String line )
456     {
457         if ( line.length() == 0 )
458         {
459             if ( currentChange != null )
460             {
461                 entries.add( currentChange );
462             }
463 
464             resetChangeLog();
465 
466             status = STATUS_GET_HEADER;
467         }
468         else
469         {
470             Matcher matcher = FILE_PATTERN.matcher( line );
471             if ( !matcher.matches() )
472             {
473                 return;
474             }
475             final String actionChar = matcher.group( 1 );
476             // action is currently not used
477             final ScmFileStatus action;
478             String name = matcher.group( 2 );
479             String originalName = null;
480             String originalRevision = null;
481             if ( "A".equals( actionChar ) )
482             {
483                 action = ScmFileStatus.ADDED;
484             }
485             else if ( "M".equals( actionChar ) )
486             {
487                 action = ScmFileStatus.MODIFIED;
488             }
489             else if ( "D".equals( actionChar ) )
490             {
491                 action = ScmFileStatus.DELETED;
492             }
493             else if ( "R".equals( actionChar ) )
494             {
495                 action = ScmFileStatus.RENAMED;
496                 originalName = name;
497                 name = matcher.group( 4 );
498                 originalRevision = currentChange.getParentRevision();
499             }
500             else if ( "C".equals( actionChar ) )
501             {
502                 action = ScmFileStatus.COPIED;
503                 originalName = name;
504                 name = matcher.group( 4 );
505                 originalRevision = currentChange.getParentRevision();
506             }
507             else
508             {
509                 action = ScmFileStatus.UNKNOWN;
510             }
511 
512             final ChangeFile changeFile = new ChangeFile( name, currentRevision );
513             changeFile.setAction( action );
514             changeFile.setOriginalName( originalName );
515             changeFile.setOriginalRevision( originalRevision );
516             currentChange.addFile( changeFile );
517         }
518     }
519 
520     private void resetChangeLog()
521     {
522         currentComment = null;
523         currentChange = null;
524     }
525 }