View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.scm.provider.git.gitexe.command.changelog;
20  
21  import java.util.ArrayList;
22  import java.util.Calendar;
23  import java.util.Date;
24  import java.util.List;
25  import java.util.Locale;
26  import java.util.TimeZone;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  import org.apache.maven.scm.ChangeFile;
31  import org.apache.maven.scm.ChangeSet;
32  import org.apache.maven.scm.ScmFileStatus;
33  import org.apache.maven.scm.util.AbstractConsumer;
34  
35  /**
36   * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
37   * @author Olivier Lamy
38   *
39   */
40  public class GitChangeLogConsumer extends AbstractConsumer {
41      /**
42       * Date formatter for git timestamp
43       * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
44       */
45      private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
46  
47      /**
48       * State machine constant: expecting header
49       */
50      private static final int STATUS_GET_HEADER = 1;
51  
52      /**
53       * State machine constant: expecting author information
54       */
55      private static final int STATUS_GET_AUTHOR = 2;
56  
57      /**
58       * State machine constant: expecting parent hash information
59       */
60      private static final int STATUS_RAW_TREE = 21;
61  
62      /**
63       * State machine constant: expecting parent hash information
64       */
65      private static final int STATUS_RAW_PARENT = 22;
66  
67      /**
68       * State machine constant: expecting author name, email and timestamp information
69       */
70      private static final int STATUS_RAW_AUTHOR = 23;
71  
72      /**
73       * State machine constant: expecting committer name, email and timestamp information
74       */
75      private static final int STATUS_RAW_COMMITTER = 24;
76  
77      /**
78       * State machine constant: expecting date information
79       */
80      private static final int STATUS_GET_DATE = 3;
81  
82      /**
83       * State machine constant: expecting file information
84       */
85      private static final int STATUS_GET_FILE = 4;
86  
87      /**
88       * State machine constant: expecting comments
89       */
90      private static final int STATUS_GET_COMMENT = 5;
91  
92      /**
93       * The pattern used to match git header lines
94       */
95      private static final Pattern HEADER_PATTERN = Pattern.compile("^commit ([A-Fa-f0-9]+)(?: \\((.*)\\))?$");
96  
97      /**
98       * The pattern used to match git author lines
99       */
100     private static final Pattern AUTHOR_PATTERN = Pattern.compile("^Author: (.*)");
101 
102     /**
103      * The pattern used to match git tree hash lines (raw mode)
104      */
105     private static final Pattern RAW_TREE_PATTERN = Pattern.compile("^tree ([A-Fa-f0-9]+)");
106 
107     /**
108      * The pattern used to match git parent hash lines (raw mode)
109      */
110     private static final Pattern RAW_PARENT_PATTERN = Pattern.compile("^parent ([A-Fa-f0-9]+)");
111 
112     /**
113      * The pattern used to match git author lines (raw mode)
114      */
115     private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile("^author (.+ <.+>) ([0-9]+) (.*)");
116 
117     /**
118      * The pattern used to match git author lines (raw mode)
119      */
120     private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile("^committer (.+ <.+>) ([0-9]+) (.*)");
121 
122     /**
123      * The pattern used to match git date lines
124      */
125     private static final Pattern DATE_PATTERN = Pattern.compile("^Date:\\s*(.*)");
126 
127     /**
128      * The pattern used to match git file lines
129      */
130     private static final Pattern FILE_PATTERN =
131             Pattern.compile("^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?");
132 
133     /**
134      * Current status of the parser
135      */
136     private int status = STATUS_GET_HEADER;
137 
138     /**
139      * List of change log entries
140      */
141     private final List<ChangeSet> entries = new ArrayList<>();
142 
143     /**
144      * The current log entry being processed by the parser
145      */
146     private ChangeSet currentChange;
147 
148     /**
149      * The current revision of the entry being processed by the parser
150      */
151     private String currentRevision;
152 
153     /**
154      * The current comment of the entry being processed by the parser
155      */
156     private StringBuilder currentComment;
157 
158     private final String userDateFormat;
159 
160     /**
161      * Default constructor.
162      */
163     public GitChangeLogConsumer(String userDateFormat) {
164         this.userDateFormat = userDateFormat;
165     }
166 
167     public List<ChangeSet> getModifications() {
168         // this is needed since the processFile does not always get a the end-sequence correctly.
169         processGetFile("");
170 
171         return entries;
172     }
173 
174     // ----------------------------------------------------------------------
175     // StreamConsumer Implementation
176     // ----------------------------------------------------------------------
177 
178     /**
179      * {@inheritDoc}
180      */
181     public void consumeLine(String line) {
182         switch (status) {
183             case STATUS_GET_HEADER:
184                 processGetHeader(line);
185                 break;
186             case STATUS_GET_AUTHOR:
187                 processGetAuthor(line);
188                 break;
189             case STATUS_GET_DATE:
190                 processGetDate(line, null);
191                 break;
192             case STATUS_GET_COMMENT:
193                 processGetComment(line);
194                 break;
195             case STATUS_GET_FILE:
196                 processGetFile(line);
197                 break;
198             case STATUS_RAW_TREE:
199                 processGetRawTree(line);
200                 break;
201             case STATUS_RAW_PARENT:
202                 processGetRawParent(line);
203                 break;
204             case STATUS_RAW_AUTHOR:
205                 processGetRawAuthor(line);
206                 break;
207             case STATUS_RAW_COMMITTER:
208                 processGetRawCommitter(line);
209                 break;
210             default:
211                 throw new IllegalStateException("Unknown state: " + status);
212         }
213     }
214 
215     // ----------------------------------------------------------------------
216     //
217     // ----------------------------------------------------------------------
218 
219     /**
220      * Process the current input line in the GET_HEADER state.  The
221      * author, date, and the revision of the entry are gathered.  Note,
222      * Git does not have per-file revisions, instead, the entire
223      * branch is given a single revision number, which is also used for
224      * the revision number of each file.
225      *
226      * @param line A line of text from the git log output
227      */
228     private void processGetHeader(String line) {
229         Matcher matcher = HEADER_PATTERN.matcher(line);
230         if (!matcher.matches()) {
231             return;
232         }
233 
234         currentRevision = matcher.group(1);
235 
236         currentChange = new ChangeSet();
237 
238         currentChange.setRevision(currentRevision);
239 
240         // Extract the tags (if present)
241         String tagList = matcher.group(2);
242         if (tagList != null) {
243             String[] rawTags = tagList.split(",");
244             for (String rawTag : rawTags) {
245                 String[] tagParts = rawTag.trim().split(":");
246                 if (tagParts.length == 2 && "tag".equals(tagParts[0])) {
247                     currentChange.addTag(tagParts[1].trim());
248                 }
249             }
250         }
251 
252         status = STATUS_GET_AUTHOR;
253     }
254 
255     /**
256      * Process the current input line in the STATUS_GET_AUTHOR state.  This
257      * state gathers all of the author information that are part of a log entry.
258      *
259      * @param line a line of text from the git log output
260      */
261     private void processGetAuthor(String line) {
262         // this autodetects 'raw' format
263         if (RAW_TREE_PATTERN.matcher(line).matches()) {
264             status = STATUS_RAW_TREE;
265             processGetRawTree(line);
266             return;
267         }
268 
269         Matcher matcher = AUTHOR_PATTERN.matcher(line);
270         if (!matcher.matches()) {
271             return;
272         }
273         String author = matcher.group(1);
274 
275         currentChange.setAuthor(author);
276 
277         status = STATUS_GET_DATE;
278     }
279 
280     /**
281      * Process the current input line in the STATUS_RAW_TREE state.  This
282      * state gathers tree hash part of a log entry.
283      *
284      * @param line a line of text from the git log output
285      */
286     private void processGetRawTree(String line) {
287         if (!RAW_TREE_PATTERN.matcher(line).matches()) {
288             return;
289         }
290         // here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) );
291         status = STATUS_RAW_PARENT;
292     }
293 
294     /**
295      * Process the current input line in the STATUS_RAW_PARENT state.  This
296      * state gathers parent revisions of a log entry.
297      *
298      * @param line a line of text from the git log output
299      */
300     private void processGetRawParent(String line) {
301         Matcher matcher = RAW_PARENT_PATTERN.matcher(line);
302         if (!matcher.matches()) {
303             status = STATUS_RAW_AUTHOR;
304             processGetRawAuthor(line);
305             return;
306         }
307         String parentHash = matcher.group(1);
308 
309         addParentRevision(parentHash);
310     }
311 
312     /**
313      * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the
314      * log. This method takes care of the difference.
315      *
316      * @param hash -
317      */
318     private void addParentRevision(String hash) {
319         if (currentChange.getParentRevision() == null) {
320             currentChange.setParentRevision(hash);
321         } else {
322             currentChange.addMergedRevision(hash);
323         }
324     }
325 
326     /**
327      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
328      * state gathers all the author information of a log entry.
329      *
330      * @param line a line of text from the git log output
331      */
332     private void processGetRawAuthor(String line) {
333         Matcher matcher = RAW_AUTHOR_PATTERN.matcher(line);
334         if (!matcher.matches()) {
335             return;
336         }
337         String author = matcher.group(1);
338         currentChange.setAuthor(author);
339 
340         String datestring = matcher.group(2);
341         String tz = matcher.group(3);
342 
343         // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
344         // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
345         Calendar c = Calendar.getInstance(TimeZone.getTimeZone(tz));
346         c.setTimeInMillis(Long.parseLong(datestring) * 1000);
347         currentChange.setDate(c.getTime());
348 
349         status = STATUS_RAW_COMMITTER;
350     }
351 
352     /**
353      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
354      * state gathers all the committer information of a log entry.
355      *
356      * @param line a line of text from the git log output
357      */
358     private void processGetRawCommitter(String line) {
359         if (!RAW_COMMITTER_PATTERN.matcher(line).matches()) {
360             return;
361         }
362         // here we could set committer and committerDate, the same way as in processGetRawAuthor
363         status = STATUS_GET_COMMENT;
364     }
365 
366     /**
367      * Process the current input line in the STATUS_GET_DATE state.  This
368      * state gathers all of the date information that are part of a log entry.
369      *
370      * @param line a line of text from the git log output
371      */
372     private void processGetDate(String line, Locale locale) {
373         Matcher matcher = DATE_PATTERN.matcher(line);
374         if (!matcher.matches()) {
375             return;
376         }
377 
378         String datestring = matcher.group(1);
379 
380         Date date = parseDate(datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale);
381 
382         currentChange.setDate(date);
383 
384         status = STATUS_GET_COMMENT;
385     }
386 
387     /**
388      * Process the current input line in the GET_COMMENT state.  This
389      * state gathers all of the comments that are part of a log entry.
390      *
391      * @param line a line of text from the git log output
392      */
393     private void processGetComment(String line) {
394         if (line.length() < 4) {
395             if (currentComment == null) {
396                 currentComment = new StringBuilder();
397             } else {
398                 currentChange.setComment(currentComment.toString());
399                 status = STATUS_GET_FILE;
400             }
401         } else {
402             if (currentComment.length() > 0) {
403                 currentComment.append('\n');
404             }
405 
406             currentComment.append(line.substring(4));
407         }
408     }
409 
410     /**
411      * Process the current input line in the GET_FILE state.  This state
412      * adds each file entry line to the current change log entry.  Note,
413      * the revision number for the entire entry is used for the revision
414      * number of each file.
415      *
416      * @param line A line of text from the git log output
417      */
418     private void processGetFile(String line) {
419         if (line.length() == 0) {
420             if (currentChange != null) {
421                 entries.add(currentChange);
422             }
423 
424             resetChangeLog();
425 
426             status = STATUS_GET_HEADER;
427         } else {
428             Matcher matcher = FILE_PATTERN.matcher(line);
429             if (!matcher.matches()) {
430                 return;
431             }
432             final String actionChar = matcher.group(1);
433             // action is currently not used
434             final ScmFileStatus action;
435             String name = matcher.group(2);
436             String originalName = null;
437             String originalRevision = null;
438             if ("A".equals(actionChar)) {
439                 action = ScmFileStatus.ADDED;
440             } else if ("M".equals(actionChar)) {
441                 action = ScmFileStatus.MODIFIED;
442             } else if ("D".equals(actionChar)) {
443                 action = ScmFileStatus.DELETED;
444             } else if ("R".equals(actionChar)) {
445                 action = ScmFileStatus.RENAMED;
446                 originalName = name;
447                 name = matcher.group(4);
448                 originalRevision = currentChange.getParentRevision();
449             } else if ("C".equals(actionChar)) {
450                 action = ScmFileStatus.COPIED;
451                 originalName = name;
452                 name = matcher.group(4);
453                 originalRevision = currentChange.getParentRevision();
454             } else {
455                 action = ScmFileStatus.UNKNOWN;
456             }
457 
458             final ChangeFile changeFile = new ChangeFile(name, currentRevision);
459             changeFile.setAction(action);
460             changeFile.setOriginalName(originalName);
461             changeFile.setOriginalRevision(originalRevision);
462             currentChange.addFile(changeFile);
463         }
464     }
465 
466     private void resetChangeLog() {
467         currentComment = null;
468         currentChange = null;
469     }
470 }