View Javadoc
1   package org.apache.maven.scm.provider.git.gitexe.command.changelog;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   * http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.apache.maven.scm.ChangeFile;
23  import org.apache.maven.scm.ChangeSet;
24  import org.apache.maven.scm.ScmFileStatus;
25  import org.apache.maven.scm.log.ScmLogger;
26  import org.apache.maven.scm.util.AbstractConsumer;
27  
28  import java.util.ArrayList;
29  import java.util.Calendar;
30  import java.util.Date;
31  import java.util.List;
32  import java.util.Locale;
33  import java.util.TimeZone;
34  import java.util.regex.Matcher;
35  import java.util.regex.Pattern;
36  
37  /**
38   * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
39   * @author Olivier Lamy
40   *
41   */
42  public class GitChangeLogConsumer
43      extends AbstractConsumer
44  {
45      /**
46       * Date formatter for git timestamp
47       * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
48       */
49      private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
50  
51      /**
52       * State machine constant: expecting header
53       */
54      private static final int STATUS_GET_HEADER = 1;
55  
56      /**
57       * State machine constant: expecting author information
58       */
59      private static final int STATUS_GET_AUTHOR = 2;
60  
61      /**
62       * State machine constant: expecting parent hash information
63       */
64      private static final int STATUS_RAW_TREE = 21;
65  
66      /**
67       * State machine constant: expecting parent hash information
68       */
69      private static final int STATUS_RAW_PARENT = 22;
70  
71      /**
72       * State machine constant: expecting author name, email and timestamp information
73       */
74      private static final int STATUS_RAW_AUTHOR = 23;
75  
76      /**
77       * State machine constant: expecting committer name, email and timestamp information
78       */
79      private static final int STATUS_RAW_COMMITTER = 24;
80  
81      /**
82       * State machine constant: expecting date information
83       */
84      private static final int STATUS_GET_DATE = 3;
85  
86      /**
87       * State machine constant: expecting file information
88       */
89      private static final int STATUS_GET_FILE = 4;
90  
91      /**
92       * State machine constant: expecting comments
93       */
94      private static final int STATUS_GET_COMMENT = 5;
95  
96      /**
97       * The pattern used to match git header lines
98       */
99      private static final Pattern HEADER_PATTERN = Pattern.compile( "^commit (.*)" );
100 
101     /**
102      * The pattern used to match git author lines
103      */
104     private static final Pattern AUTHOR_PATTERN = Pattern.compile( "^Author: (.*)" );
105 
106     /**
107      * The pattern used to match git tree hash lines (raw mode)
108      */
109     private static final Pattern RAW_TREE_PATTERN = Pattern.compile( "^tree ([A-Fa-f0-9]+)" ); 
110 
111     /**
112      * The pattern used to match git parent hash lines (raw mode)
113      */
114     private static final Pattern RAW_PARENT_PATTERN = Pattern.compile( "^parent ([A-Fa-f0-9]+)" );
115 
116     /**
117      * The pattern used to match git author lines (raw mode)
118      */
119     private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile( "^author (.+ <.+>) ([0-9]+) (.*)" );
120 
121     /**
122      * The pattern used to match git author lines (raw mode)
123      */
124     private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile( "^committer (.+ <.+>) ([0-9]+) (.*)" );
125 
126     /**
127      * The pattern used to match git date lines
128      */
129     private static final Pattern DATE_PATTERN = Pattern.compile( "^Date:\\s*(.*)" );
130 
131     /**
132      * The pattern used to match git file lines
133      */
134     private static final Pattern FILE_PATTERN =
135         Pattern.compile( "^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?" );
136 
137     /**
138      * Current status of the parser
139      */
140     private int status = STATUS_GET_HEADER;
141 
142     /**
143      * List of change log entries
144      */
145     private List<ChangeSet> entries = new ArrayList<ChangeSet>();
146 
147     /**
148      * The current log entry being processed by the parser
149      */
150     private ChangeSet currentChange;
151 
152     /**
153      * The current revision of the entry being processed by the parser
154      */
155     private String currentRevision;
156 
157     /**
158      * The current comment of the entry being processed by the parser
159      */
160     private StringBuilder currentComment;
161 
162     private String userDateFormat;
163 
164     /**
165      * Default constructor.
166      */
167     public GitChangeLogConsumer( ScmLogger logger, String userDateFormat )
168     {
169         super( logger );
170 
171         this.userDateFormat = userDateFormat;
172     }
173 
174     public List<ChangeSet> getModifications()
175     {
176         // this is needed since the processFile does not always get a the end-sequence correctly.
177         processGetFile( "" );
178 
179         return entries;
180     }
181 
182     // ----------------------------------------------------------------------
183     // StreamConsumer Implementation
184     // ----------------------------------------------------------------------
185 
186     /**
187      * {@inheritDoc}
188      */
189     public void consumeLine( String line )
190     {
191         switch ( status )
192         {
193             case STATUS_GET_HEADER:
194                 processGetHeader( line );
195                 break;
196             case STATUS_GET_AUTHOR:
197                 processGetAuthor( line );
198                 break;
199             case STATUS_GET_DATE:
200                 processGetDate( line, null );
201                 break;
202             case STATUS_GET_COMMENT:
203                 processGetComment( line );
204                 break;
205             case STATUS_GET_FILE:
206                 processGetFile( line );
207                 break;
208             case STATUS_RAW_TREE:
209                 processGetRawTree( line );
210                 break;
211             case STATUS_RAW_PARENT:
212                 processGetRawParent( line );
213                 break;
214             case STATUS_RAW_AUTHOR:
215                 processGetRawAuthor( line );
216                 break;
217             case STATUS_RAW_COMMITTER:
218                 processGetRawCommitter( line );
219                 break;
220             default:
221                 throw new IllegalStateException( "Unknown state: " + status );
222         }
223     }
224 
225     // ----------------------------------------------------------------------
226     //
227     // ----------------------------------------------------------------------
228 
229     /**
230      * Process the current input line in the GET_HEADER state.  The
231      * author, date, and the revision of the entry are gathered.  Note,
232      * Git does not have per-file revisions, instead, the entire
233      * branch is given a single revision number, which is also used for
234      * the revision number of each file.
235      *
236      * @param line A line of text from the git log output
237      */
238     private void processGetHeader( String line )
239     {
240         Matcher matcher = HEADER_PATTERN.matcher( line );
241         if ( !matcher.matches() )
242         {
243             return;
244         }
245 
246         currentRevision = matcher.group( 1 );
247 
248         currentChange = new ChangeSet();
249 
250         currentChange.setRevision( currentRevision );
251 
252         status = STATUS_GET_AUTHOR;
253     }
254 
255     /**
256      * Process the current input line in the STATUS_GET_AUTHOR state.  This
257      * state gathers all of the author information that are part of a log entry.
258      *
259      * @param line a line of text from the git log output
260      */
261     private void processGetAuthor( String line )
262     {
263         // this autodetects 'raw' format
264         if ( RAW_TREE_PATTERN.matcher( line ).matches() )
265         {
266             status = STATUS_RAW_TREE;
267             processGetRawTree( line );
268             return;
269         }
270 
271         Matcher matcher = AUTHOR_PATTERN.matcher( line );
272         if ( !matcher.matches() )
273         {
274             return;
275         }
276         String author = matcher.group( 1 );
277 
278         currentChange.setAuthor( author );
279 
280         status = STATUS_GET_DATE;
281     }
282 
283     /**
284      * Process the current input line in the STATUS_RAW_TREE state.  This
285      * state gathers tree hash part of a log entry.
286      *
287      * @param line a line of text from the git log output
288      */
289     private void processGetRawTree( String line )
290     {
291         if ( !RAW_TREE_PATTERN.matcher( line ).matches() )
292         {
293             return;
294         }
295         //here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) );
296         status = STATUS_RAW_PARENT;
297     }
298 
299     /**
300      * Process the current input line in the STATUS_RAW_PARENT state.  This
301      * state gathers parent revisions of a log entry.
302      *
303      * @param line a line of text from the git log output
304      */
305     private void processGetRawParent( String line )
306     {
307         Matcher matcher = RAW_PARENT_PATTERN.matcher( line );
308         if ( !matcher.matches() )
309         {
310             status = STATUS_RAW_AUTHOR;
311             processGetRawAuthor( line );
312             return;
313         }
314         String parentHash = matcher.group( 1 );
315 
316         addParentRevision( parentHash );
317     }
318 
319     /**
320      * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the log.
321      * This method takes care of the difference.
322      *
323      * @param hash -
324      */
325     private void addParentRevision( String hash )
326     {
327         if ( currentChange.getParentRevision() == null )
328         {
329             currentChange.setParentRevision( hash );
330         }
331         else
332         {
333             currentChange.addMergedRevision( hash );
334         }
335     }
336 
337     /**
338      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
339      * state gathers all the author information of a log entry.
340      *
341      * @param line a line of text from the git log output
342      */
343     private void processGetRawAuthor( String line )
344     {
345         Matcher matcher = RAW_AUTHOR_PATTERN.matcher( line );
346         if ( !matcher.matches() )
347         {
348             return;
349         }
350         String author = matcher.group( 1 );
351         currentChange.setAuthor( author );
352 
353         String datestring = matcher.group( 2 );
354         String tz = matcher.group( 3 );
355 
356         // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
357         // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
358         Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) );
359         c.setTimeInMillis( Long.parseLong( datestring ) * 1000 );
360         currentChange.setDate( c.getTime() );
361 
362         status = STATUS_RAW_COMMITTER;
363     }
364 
365     /**
366      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
367      * state gathers all the committer information of a log entry.
368      *
369      * @param line a line of text from the git log output
370      */
371     private void processGetRawCommitter( String line )
372     {
373         if ( !RAW_COMMITTER_PATTERN.matcher( line ).matches() )
374         {
375             return;
376         }
377         // here we could set committer and committerDate, the same way as in processGetRawAuthor
378         status = STATUS_GET_COMMENT;
379     }
380 
381     /**
382      * Process the current input line in the STATUS_GET_DATE state.  This
383      * state gathers all of the date information that are part of a log entry.
384      *
385      * @param line a line of text from the git log output
386      */
387     private void processGetDate( String line, Locale locale )
388     {
389         Matcher matcher = DATE_PATTERN.matcher( line );
390         if ( !matcher.matches() )
391         {
392             return;
393         }
394 
395         String datestring = matcher.group( 1 );
396 
397         Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale );
398 
399         currentChange.setDate( date );
400 
401         status = STATUS_GET_COMMENT;
402     }
403 
404     /**
405      * Process the current input line in the GET_COMMENT state.  This
406      * state gathers all of the comments that are part of a log entry.
407      *
408      * @param line a line of text from the git log output
409      */
410     private void processGetComment( String line )
411     {
412         if ( line.length() < 4 )
413         {
414             if ( currentComment == null )
415             {
416                 currentComment = new StringBuilder();
417             }
418             else
419             {
420                 currentChange.setComment( currentComment.toString() );
421                 status = STATUS_GET_FILE;
422             }
423         }
424         else
425         {
426             if ( currentComment.length() > 0 )
427             {
428                 currentComment.append( '\n' );
429             }
430 
431             currentComment.append( line.substring( 4 ) );
432         }
433     }
434 
435     /**
436      * Process the current input line in the GET_FILE state.  This state
437      * adds each file entry line to the current change log entry.  Note,
438      * the revision number for the entire entry is used for the revision
439      * number of each file.
440      *
441      * @param line A line of text from the git log output
442      */
443     private void processGetFile( String line )
444     {
445         if ( line.length() == 0 )
446         {
447             if ( currentChange != null )
448             {
449                 entries.add( currentChange );
450             }
451 
452             resetChangeLog();
453 
454             status = STATUS_GET_HEADER;
455         }
456         else
457         {
458             Matcher matcher = FILE_PATTERN.matcher( line );
459             if ( !matcher.matches() )
460             {
461                 return;
462             }
463             final String actionChar = matcher.group( 1 );
464             // action is currently not used
465             final ScmFileStatus action;
466             String name = matcher.group( 2 );
467             String originalName = null;
468             String originalRevision = null;
469             if ( "A".equals( actionChar ) )
470             {
471                 action = ScmFileStatus.ADDED;
472             }
473             else if ( "M".equals( actionChar ) )
474             {
475                 action = ScmFileStatus.MODIFIED;
476             }
477             else if ( "D".equals( actionChar ) )
478             {
479                 action = ScmFileStatus.DELETED;
480             }
481             else if ( "R".equals( actionChar ) )
482             {
483                 action = ScmFileStatus.RENAMED;
484                 originalName = name;
485                 name = matcher.group( 4 );
486                 originalRevision = currentChange.getParentRevision();
487             }
488             else if ( "C".equals( actionChar ) )
489             {
490                 action = ScmFileStatus.COPIED;
491                 originalName = name;
492                 name = matcher.group( 4 );
493                 originalRevision = currentChange.getParentRevision();
494             }
495             else
496             {
497                 action = ScmFileStatus.UNKNOWN;
498             }
499 
500             final ChangeFile changeFile = new ChangeFile( name, currentRevision );
501             changeFile.setAction( action );
502             changeFile.setOriginalName( originalName );
503             changeFile.setOriginalRevision( originalRevision );
504             currentChange.addFile( changeFile );
505         }
506     }
507 
508     private void resetChangeLog()
509     {
510         currentComment = null;
511         currentChange = null;
512     }
513 }