001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.maven.scm.provider.git.gitexe.command.changelog;
020
021import java.util.ArrayList;
022import java.util.Calendar;
023import java.util.Date;
024import java.util.List;
025import java.util.Locale;
026import java.util.TimeZone;
027import java.util.regex.Matcher;
028import java.util.regex.Pattern;
029
030import org.apache.maven.scm.ChangeFile;
031import org.apache.maven.scm.ChangeSet;
032import org.apache.maven.scm.ScmFileStatus;
033import org.apache.maven.scm.util.AbstractConsumer;
034
035/**
036 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
037 * @author Olivier Lamy
038 *
039 */
040public class GitChangeLogConsumer extends AbstractConsumer {
041    /**
042     * Date formatter for git timestamp
043     * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
044     */
045    private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
046
047    /**
048     * State machine constant: expecting header
049     */
050    private static final int STATUS_GET_HEADER = 1;
051
052    /**
053     * State machine constant: expecting author information
054     */
055    private static final int STATUS_GET_AUTHOR = 2;
056
057    /**
058     * State machine constant: expecting parent hash information
059     */
060    private static final int STATUS_RAW_TREE = 21;
061
062    /**
063     * State machine constant: expecting parent hash information
064     */
065    private static final int STATUS_RAW_PARENT = 22;
066
067    /**
068     * State machine constant: expecting author name, email and timestamp information
069     */
070    private static final int STATUS_RAW_AUTHOR = 23;
071
072    /**
073     * State machine constant: expecting committer name, email and timestamp information
074     */
075    private static final int STATUS_RAW_COMMITTER = 24;
076
077    /**
078     * State machine constant: expecting date information
079     */
080    private static final int STATUS_GET_DATE = 3;
081
082    /**
083     * State machine constant: expecting file information
084     */
085    private static final int STATUS_GET_FILE = 4;
086
087    /**
088     * State machine constant: expecting comments
089     */
090    private static final int STATUS_GET_COMMENT = 5;
091
092    /**
093     * The pattern used to match git header lines
094     */
095    private static final Pattern HEADER_PATTERN = Pattern.compile("^commit ([A-Fa-f0-9]+)(?: \\((.*)\\))?$");
096
097    /**
098     * The pattern used to match git author lines
099     */
100    private static final Pattern AUTHOR_PATTERN = Pattern.compile("^Author: (.*)");
101
102    /**
103     * The pattern used to match git tree hash lines (raw mode)
104     */
105    private static final Pattern RAW_TREE_PATTERN = Pattern.compile("^tree ([A-Fa-f0-9]+)");
106
107    /**
108     * The pattern used to match git parent hash lines (raw mode)
109     */
110    private static final Pattern RAW_PARENT_PATTERN = Pattern.compile("^parent ([A-Fa-f0-9]+)");
111
112    /**
113     * The pattern used to match git author lines (raw mode)
114     */
115    private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile("^author (.+ <.+>) ([0-9]+) (.*)");
116
117    /**
118     * The pattern used to match git author lines (raw mode)
119     */
120    private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile("^committer (.+ <.+>) ([0-9]+) (.*)");
121
122    /**
123     * The pattern used to match git date lines
124     */
125    private static final Pattern DATE_PATTERN = Pattern.compile("^Date:\\s*(.*)");
126
127    /**
128     * The pattern used to match git file lines
129     */
130    private static final Pattern FILE_PATTERN =
131            Pattern.compile("^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?");
132
133    /**
134     * Current status of the parser
135     */
136    private int status = STATUS_GET_HEADER;
137
138    /**
139     * List of change log entries
140     */
141    private final List<ChangeSet> entries = new ArrayList<>();
142
143    /**
144     * The current log entry being processed by the parser
145     */
146    private ChangeSet currentChange;
147
148    /**
149     * The current revision of the entry being processed by the parser
150     */
151    private String currentRevision;
152
153    /**
154     * The current comment of the entry being processed by the parser
155     */
156    private StringBuilder currentComment;
157
158    private final String userDateFormat;
159
160    /**
161     * Default constructor.
162     */
163    public GitChangeLogConsumer(String userDateFormat) {
164        this.userDateFormat = userDateFormat;
165    }
166
167    public List<ChangeSet> getModifications() {
168        // this is needed since the processFile does not always get a the end-sequence correctly.
169        processGetFile("");
170
171        return entries;
172    }
173
174    // ----------------------------------------------------------------------
175    // StreamConsumer Implementation
176    // ----------------------------------------------------------------------
177
178    /**
179     * {@inheritDoc}
180     */
181    public void consumeLine(String line) {
182        switch (status) {
183            case STATUS_GET_HEADER:
184                processGetHeader(line);
185                break;
186            case STATUS_GET_AUTHOR:
187                processGetAuthor(line);
188                break;
189            case STATUS_GET_DATE:
190                processGetDate(line, null);
191                break;
192            case STATUS_GET_COMMENT:
193                processGetComment(line);
194                break;
195            case STATUS_GET_FILE:
196                processGetFile(line);
197                break;
198            case STATUS_RAW_TREE:
199                processGetRawTree(line);
200                break;
201            case STATUS_RAW_PARENT:
202                processGetRawParent(line);
203                break;
204            case STATUS_RAW_AUTHOR:
205                processGetRawAuthor(line);
206                break;
207            case STATUS_RAW_COMMITTER:
208                processGetRawCommitter(line);
209                break;
210            default:
211                throw new IllegalStateException("Unknown state: " + status);
212        }
213    }
214
215    // ----------------------------------------------------------------------
216    //
217    // ----------------------------------------------------------------------
218
219    /**
220     * Process the current input line in the GET_HEADER state.  The
221     * author, date, and the revision of the entry are gathered.  Note,
222     * Git does not have per-file revisions, instead, the entire
223     * branch is given a single revision number, which is also used for
224     * the revision number of each file.
225     *
226     * @param line A line of text from the git log output
227     */
228    private void processGetHeader(String line) {
229        Matcher matcher = HEADER_PATTERN.matcher(line);
230        if (!matcher.matches()) {
231            return;
232        }
233
234        currentRevision = matcher.group(1);
235
236        currentChange = new ChangeSet();
237
238        currentChange.setRevision(currentRevision);
239
240        // Extract the tags (if present)
241        String tagList = matcher.group(2);
242        if (tagList != null) {
243            String[] rawTags = tagList.split(",");
244            for (String rawTag : rawTags) {
245                String[] tagParts = rawTag.trim().split(":");
246                if (tagParts.length == 2 && "tag".equals(tagParts[0])) {
247                    currentChange.addTag(tagParts[1].trim());
248                }
249            }
250        }
251
252        status = STATUS_GET_AUTHOR;
253    }
254
255    /**
256     * Process the current input line in the STATUS_GET_AUTHOR state.  This
257     * state gathers all of the author information that are part of a log entry.
258     *
259     * @param line a line of text from the git log output
260     */
261    private void processGetAuthor(String line) {
262        // this autodetects 'raw' format
263        if (RAW_TREE_PATTERN.matcher(line).matches()) {
264            status = STATUS_RAW_TREE;
265            processGetRawTree(line);
266            return;
267        }
268
269        Matcher matcher = AUTHOR_PATTERN.matcher(line);
270        if (!matcher.matches()) {
271            return;
272        }
273        String author = matcher.group(1);
274
275        currentChange.setAuthor(author);
276
277        status = STATUS_GET_DATE;
278    }
279
280    /**
281     * Process the current input line in the STATUS_RAW_TREE state.  This
282     * state gathers tree hash part of a log entry.
283     *
284     * @param line a line of text from the git log output
285     */
286    private void processGetRawTree(String line) {
287        if (!RAW_TREE_PATTERN.matcher(line).matches()) {
288            return;
289        }
290        // here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) );
291        status = STATUS_RAW_PARENT;
292    }
293
294    /**
295     * Process the current input line in the STATUS_RAW_PARENT state.  This
296     * state gathers parent revisions of a log entry.
297     *
298     * @param line a line of text from the git log output
299     */
300    private void processGetRawParent(String line) {
301        Matcher matcher = RAW_PARENT_PATTERN.matcher(line);
302        if (!matcher.matches()) {
303            status = STATUS_RAW_AUTHOR;
304            processGetRawAuthor(line);
305            return;
306        }
307        String parentHash = matcher.group(1);
308
309        addParentRevision(parentHash);
310    }
311
312    /**
313     * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the
314     * log. This method takes care of the difference.
315     *
316     * @param hash -
317     */
318    private void addParentRevision(String hash) {
319        if (currentChange.getParentRevision() == null) {
320            currentChange.setParentRevision(hash);
321        } else {
322            currentChange.addMergedRevision(hash);
323        }
324    }
325
326    /**
327     * Process the current input line in the STATUS_RAW_AUTHOR state.  This
328     * state gathers all the author information of a log entry.
329     *
330     * @param line a line of text from the git log output
331     */
332    private void processGetRawAuthor(String line) {
333        Matcher matcher = RAW_AUTHOR_PATTERN.matcher(line);
334        if (!matcher.matches()) {
335            return;
336        }
337        String author = matcher.group(1);
338        currentChange.setAuthor(author);
339
340        String datestring = matcher.group(2);
341        String tz = matcher.group(3);
342
343        // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
344        // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
345        Calendar c = Calendar.getInstance(TimeZone.getTimeZone(tz));
346        c.setTimeInMillis(Long.parseLong(datestring) * 1000);
347        currentChange.setDate(c.getTime());
348
349        status = STATUS_RAW_COMMITTER;
350    }
351
352    /**
353     * Process the current input line in the STATUS_RAW_AUTHOR state.  This
354     * state gathers all the committer information of a log entry.
355     *
356     * @param line a line of text from the git log output
357     */
358    private void processGetRawCommitter(String line) {
359        if (!RAW_COMMITTER_PATTERN.matcher(line).matches()) {
360            return;
361        }
362        // here we could set committer and committerDate, the same way as in processGetRawAuthor
363        status = STATUS_GET_COMMENT;
364    }
365
366    /**
367     * Process the current input line in the STATUS_GET_DATE state.  This
368     * state gathers all of the date information that are part of a log entry.
369     *
370     * @param line a line of text from the git log output
371     */
372    private void processGetDate(String line, Locale locale) {
373        Matcher matcher = DATE_PATTERN.matcher(line);
374        if (!matcher.matches()) {
375            return;
376        }
377
378        String datestring = matcher.group(1);
379
380        Date date = parseDate(datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale);
381
382        currentChange.setDate(date);
383
384        status = STATUS_GET_COMMENT;
385    }
386
387    /**
388     * Process the current input line in the GET_COMMENT state.  This
389     * state gathers all of the comments that are part of a log entry.
390     *
391     * @param line a line of text from the git log output
392     */
393    private void processGetComment(String line) {
394        if (line.length() < 4) {
395            if (currentComment == null) {
396                currentComment = new StringBuilder();
397            } else {
398                currentChange.setComment(currentComment.toString());
399                status = STATUS_GET_FILE;
400            }
401        } else {
402            if (currentComment.length() > 0) {
403                currentComment.append('\n');
404            }
405
406            currentComment.append(line.substring(4));
407        }
408    }
409
410    /**
411     * Process the current input line in the GET_FILE state.  This state
412     * adds each file entry line to the current change log entry.  Note,
413     * the revision number for the entire entry is used for the revision
414     * number of each file.
415     *
416     * @param line A line of text from the git log output
417     */
418    private void processGetFile(String line) {
419        if (line.length() == 0) {
420            if (currentChange != null) {
421                entries.add(currentChange);
422            }
423
424            resetChangeLog();
425
426            status = STATUS_GET_HEADER;
427        } else {
428            Matcher matcher = FILE_PATTERN.matcher(line);
429            if (!matcher.matches()) {
430                return;
431            }
432            final String actionChar = matcher.group(1);
433            // action is currently not used
434            final ScmFileStatus action;
435            String name = matcher.group(2);
436            String originalName = null;
437            String originalRevision = null;
438            if ("A".equals(actionChar)) {
439                action = ScmFileStatus.ADDED;
440            } else if ("M".equals(actionChar)) {
441                action = ScmFileStatus.MODIFIED;
442            } else if ("D".equals(actionChar)) {
443                action = ScmFileStatus.DELETED;
444            } else if ("R".equals(actionChar)) {
445                action = ScmFileStatus.RENAMED;
446                originalName = name;
447                name = matcher.group(4);
448                originalRevision = currentChange.getParentRevision();
449            } else if ("C".equals(actionChar)) {
450                action = ScmFileStatus.COPIED;
451                originalName = name;
452                name = matcher.group(4);
453                originalRevision = currentChange.getParentRevision();
454            } else {
455                action = ScmFileStatus.UNKNOWN;
456            }
457
458            final ChangeFile changeFile = new ChangeFile(name, currentRevision);
459            changeFile.setAction(action);
460            changeFile.setOriginalName(originalName);
461            changeFile.setOriginalRevision(originalRevision);
462            currentChange.addFile(changeFile);
463        }
464    }
465
466    private void resetChangeLog() {
467        currentComment = null;
468        currentChange = null;
469    }
470}