View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.scm.provider.git.gitexe.command.changelog;
20  
21  import java.util.ArrayList;
22  import java.util.Calendar;
23  import java.util.Date;
24  import java.util.List;
25  import java.util.Locale;
26  import java.util.TimeZone;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  import org.apache.maven.scm.ChangeFile;
31  import org.apache.maven.scm.ChangeSet;
32  import org.apache.maven.scm.ScmFileStatus;
33  import org.apache.maven.scm.util.AbstractConsumer;
34  
35  /**
36   * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
37   * @author Olivier Lamy
38   */
39  public class GitChangeLogConsumer extends AbstractConsumer {
40      /**
41       * Date formatter for git timestamp
42       * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200.
43       */
44      private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
45  
46      /**
47       * State machine constant: expecting header.
48       */
49      private static final int STATUS_GET_HEADER = 1;
50  
51      /**
52       * State machine constant: expecting author information.
53       */
54      private static final int STATUS_GET_AUTHOR = 2;
55  
56      /**
57       * State machine constant: expecting parent hash information.
58       */
59      private static final int STATUS_RAW_TREE = 21;
60  
61      /**
62       * State machine constant: expecting parent hash information.
63       */
64      private static final int STATUS_RAW_PARENT = 22;
65  
66      /**
67       * State machine constant: expecting author name, email and timestamp information.
68       */
69      private static final int STATUS_RAW_AUTHOR = 23;
70  
71      /**
72       * State machine constant: expecting committer name, email and timestamp information.
73       */
74      private static final int STATUS_RAW_COMMITTER = 24;
75  
76      /**
77       * State machine constant: expecting date information.
78       */
79      private static final int STATUS_GET_DATE = 3;
80  
81      /**
82       * State machine constant: expecting file information.
83       */
84      private static final int STATUS_GET_FILE = 4;
85  
86      /**
87       * State machine constant: expecting comments.
88       */
89      private static final int STATUS_GET_COMMENT = 5;
90  
91      /**
92       * The pattern used to match git header lines.
93       */
94      private static final Pattern HEADER_PATTERN = Pattern.compile("^commit ([A-Fa-f0-9]+)(?: \\((.*)\\))?$");
95  
96      /**
97       * The pattern used to match git author lines.
98       */
99      private static final Pattern AUTHOR_PATTERN = Pattern.compile("^Author: (.*)");
100 
101     /**
102      * The pattern used to match git tree hash lines (raw mode)
103      */
104     private static final Pattern RAW_TREE_PATTERN = Pattern.compile("^tree ([A-Fa-f0-9]+)");
105 
106     /**
107      * The pattern used to match git parent hash lines (raw mode)
108      */
109     private static final Pattern RAW_PARENT_PATTERN = Pattern.compile("^parent ([A-Fa-f0-9]+)");
110 
111     /**
112      * The pattern used to match git author lines (raw mode)
113      */
114     private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile("^author (.+ <.+>) ([0-9]+) (.*)");
115 
116     /**
117      * The pattern used to match git author lines (raw mode)
118      */
119     private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile("^committer (.+ <.+>) ([0-9]+) (.*)");
120 
121     /**
122      * The pattern used to match git date lines.
123      */
124     private static final Pattern DATE_PATTERN = Pattern.compile("^Date:\\s*(.*)");
125 
126     /**
127      * The pattern used to match git file lines.
128      */
129     private static final Pattern FILE_PATTERN =
130             Pattern.compile("^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?");
131 
132     /**
133      * Current status of the parser.
134      */
135     private int status = STATUS_GET_HEADER;
136 
137     /**
138      * List of change log entries.
139      */
140     private final List<ChangeSet> entries = new ArrayList<>();
141 
142     /**
143      * The current log entry being processed by the parser.
144      */
145     private ChangeSet currentChange;
146 
147     /**
148      * The current revision of the entry being processed by the parser.
149      */
150     private String currentRevision;
151 
152     /**
153      * The current comment of the entry being processed by the parser.
154      */
155     private StringBuilder currentComment;
156 
157     private final String userDateFormat;
158 
159     /**
160      * Default constructor.
161      */
162     public GitChangeLogConsumer(String userDateFormat) {
163         this.userDateFormat = userDateFormat;
164     }
165 
166     public List<ChangeSet> getModifications() {
167         // this is needed since the processFile does not always get a the end-sequence correctly.
168         processGetFile("");
169 
170         return entries;
171     }
172 
173     // ----------------------------------------------------------------------
174     // StreamConsumer Implementation
175     // ----------------------------------------------------------------------
176 
177     /**
178      * {@inheritDoc}
179      */
180     public void consumeLine(String line) {
181         switch (status) {
182             case STATUS_GET_HEADER:
183                 processGetHeader(line);
184                 break;
185             case STATUS_GET_AUTHOR:
186                 processGetAuthor(line);
187                 break;
188             case STATUS_GET_DATE:
189                 processGetDate(line, null);
190                 break;
191             case STATUS_GET_COMMENT:
192                 processGetComment(line);
193                 break;
194             case STATUS_GET_FILE:
195                 processGetFile(line);
196                 break;
197             case STATUS_RAW_TREE:
198                 processGetRawTree(line);
199                 break;
200             case STATUS_RAW_PARENT:
201                 processGetRawParent(line);
202                 break;
203             case STATUS_RAW_AUTHOR:
204                 processGetRawAuthor(line);
205                 break;
206             case STATUS_RAW_COMMITTER:
207                 processGetRawCommitter(line);
208                 break;
209             default:
210                 throw new IllegalStateException("Unknown state: " + status);
211         }
212     }
213 
214     // ----------------------------------------------------------------------
215     //
216     // ----------------------------------------------------------------------
217 
218     /**
219      * Process the current input line in the GET_HEADER state.  The
220      * author, date, and the revision of the entry are gathered.  Note,
221      * Git does not have per-file revisions, instead, the entire
222      * branch is given a single revision number, which is also used for
223      * the revision number of each file.
224      *
225      * @param line a line of text from the git log output
226      */
227     private void processGetHeader(String line) {
228         Matcher matcher = HEADER_PATTERN.matcher(line);
229         if (!matcher.matches()) {
230             return;
231         }
232 
233         currentRevision = matcher.group(1);
234 
235         currentChange = new ChangeSet();
236 
237         currentChange.setRevision(currentRevision);
238 
239         // Extract the tags (if present)
240         String tagList = matcher.group(2);
241         if (tagList != null) {
242             String[] rawTags = tagList.split(",");
243             for (String rawTag : rawTags) {
244                 String[] tagParts = rawTag.trim().split(":");
245                 if (tagParts.length == 2 && "tag".equals(tagParts[0])) {
246                     currentChange.addTag(tagParts[1].trim());
247                 }
248             }
249         }
250 
251         status = STATUS_GET_AUTHOR;
252     }
253 
254     /**
255      * Process the current input line in the STATUS_GET_AUTHOR state.  This
256      * state gathers all of the author information that are part of a log entry.
257      *
258      * @param line a line of text from the git log output
259      */
260     private void processGetAuthor(String line) {
261         // this autodetects 'raw' format
262         if (RAW_TREE_PATTERN.matcher(line).matches()) {
263             status = STATUS_RAW_TREE;
264             processGetRawTree(line);
265             return;
266         }
267 
268         Matcher matcher = AUTHOR_PATTERN.matcher(line);
269         if (!matcher.matches()) {
270             return;
271         }
272         String author = matcher.group(1);
273 
274         currentChange.setAuthor(author);
275 
276         status = STATUS_GET_DATE;
277     }
278 
279     /**
280      * Process the current input line in the STATUS_RAW_TREE state.  This
281      * state gathers tree hash part of a log entry.
282      *
283      * @param line a line of text from the git log output
284      */
285     private void processGetRawTree(String line) {
286         if (!RAW_TREE_PATTERN.matcher(line).matches()) {
287             return;
288         }
289         // here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) );
290         status = STATUS_RAW_PARENT;
291     }
292 
293     /**
294      * Process the current input line in the STATUS_RAW_PARENT state.  This
295      * state gathers parent revisions of a log entry.
296      *
297      * @param line a line of text from the git log output
298      */
299     private void processGetRawParent(String line) {
300         Matcher matcher = RAW_PARENT_PATTERN.matcher(line);
301         if (!matcher.matches()) {
302             status = STATUS_RAW_AUTHOR;
303             processGetRawAuthor(line);
304             return;
305         }
306         String parentHash = matcher.group(1);
307 
308         addParentRevision(parentHash);
309     }
310 
311     /**
312      * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the
313      * log. This method takes care of the difference.
314      *
315      * @param hash -
316      */
317     private void addParentRevision(String hash) {
318         if (currentChange.getParentRevision() == null) {
319             currentChange.setParentRevision(hash);
320         } else {
321             currentChange.addMergedRevision(hash);
322         }
323     }
324 
325     /**
326      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
327      * state gathers all the author information of a log entry.
328      *
329      * @param line a line of text from the git log output
330      */
331     private void processGetRawAuthor(String line) {
332         Matcher matcher = RAW_AUTHOR_PATTERN.matcher(line);
333         if (!matcher.matches()) {
334             return;
335         }
336         String author = matcher.group(1);
337         currentChange.setAuthor(author);
338 
339         String datestring = matcher.group(2);
340         String tz = matcher.group(3);
341 
342         // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
343         // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
344         Calendar c = Calendar.getInstance(TimeZone.getTimeZone(tz));
345         c.setTimeInMillis(Long.parseLong(datestring) * 1000);
346         currentChange.setDate(c.getTime());
347 
348         status = STATUS_RAW_COMMITTER;
349     }
350 
351     /**
352      * Process the current input line in the STATUS_RAW_AUTHOR state.  This
353      * state gathers all the committer information of a log entry.
354      *
355      * @param line a line of text from the git log output
356      */
357     private void processGetRawCommitter(String line) {
358         if (!RAW_COMMITTER_PATTERN.matcher(line).matches()) {
359             return;
360         }
361         // here we could set committer and committerDate, the same way as in processGetRawAuthor
362         status = STATUS_GET_COMMENT;
363     }
364 
365     /**
366      * Process the current input line in the STATUS_GET_DATE state.  This
367      * state gathers all of the date information that are part of a log entry.
368      *
369      * @param line a line of text from the git log output
370      */
371     private void processGetDate(String line, Locale locale) {
372         Matcher matcher = DATE_PATTERN.matcher(line);
373         if (!matcher.matches()) {
374             return;
375         }
376 
377         String datestring = matcher.group(1);
378 
379         Date date = parseDate(datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale);
380 
381         currentChange.setDate(date);
382 
383         status = STATUS_GET_COMMENT;
384     }
385 
386     /**
387      * Process the current input line in the GET_COMMENT state.  This
388      * state gathers all of the comments that are part of a log entry.
389      *
390      * @param line a line of text from the git log output
391      */
392     private void processGetComment(String line) {
393         if (line.length() < 4) {
394             if (currentComment == null) {
395                 currentComment = new StringBuilder();
396             } else {
397                 currentChange.setComment(currentComment.toString());
398                 status = STATUS_GET_FILE;
399             }
400         } else {
401             if (currentComment.length() > 0) {
402                 currentComment.append('\n');
403             }
404 
405             currentComment.append(line.substring(4));
406         }
407     }
408 
409     /**
410      * Process the current input line in the GET_FILE state.  This state
411      * adds each file entry line to the current change log entry.  Note,
412      * the revision number for the entire entry is used for the revision
413      * number of each file.
414      *
415      * @param line a line of text from the git log output
416      */
417     private void processGetFile(String line) {
418         if (line.length() == 0) {
419             if (currentChange != null) {
420                 entries.add(currentChange);
421             }
422 
423             resetChangeLog();
424 
425             status = STATUS_GET_HEADER;
426         } else {
427             Matcher matcher = FILE_PATTERN.matcher(line);
428             if (!matcher.matches()) {
429                 return;
430             }
431             final String actionChar = matcher.group(1);
432             // action is currently not used
433             final ScmFileStatus action;
434             String name = matcher.group(2);
435             String originalName = null;
436             String originalRevision = null;
437             if ("A".equals(actionChar)) {
438                 action = ScmFileStatus.ADDED;
439             } else if ("M".equals(actionChar)) {
440                 action = ScmFileStatus.MODIFIED;
441             } else if ("D".equals(actionChar)) {
442                 action = ScmFileStatus.DELETED;
443             } else if ("R".equals(actionChar)) {
444                 action = ScmFileStatus.RENAMED;
445                 originalName = name;
446                 name = matcher.group(4);
447                 originalRevision = currentChange.getParentRevision();
448             } else if ("C".equals(actionChar)) {
449                 action = ScmFileStatus.COPIED;
450                 originalName = name;
451                 name = matcher.group(4);
452                 originalRevision = currentChange.getParentRevision();
453             } else {
454                 action = ScmFileStatus.UNKNOWN;
455             }
456 
457             final ChangeFile changeFile = new ChangeFile(name, currentRevision);
458             changeFile.setAction(action);
459             changeFile.setOriginalName(originalName);
460             changeFile.setOriginalRevision(originalRevision);
461             currentChange.addFile(changeFile);
462         }
463     }
464 
465     private void resetChangeLog() {
466         currentComment = null;
467         currentChange = null;
468     }
469 }