001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.maven.scm.provider.git.gitexe.command.status;
020
021import java.io.File;
022import java.io.UnsupportedEncodingException;
023import java.net.URI;
024import java.net.URISyntaxException;
025import java.util.ArrayList;
026import java.util.List;
027import java.util.regex.Matcher;
028import java.util.regex.Pattern;
029
030import org.apache.maven.scm.ScmFile;
031import org.apache.maven.scm.ScmFileSet;
032import org.apache.maven.scm.ScmFileStatus;
033import org.apache.maven.scm.util.AbstractConsumer;
034
035/**
036 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
037 */
038public class GitStatusConsumer extends AbstractConsumer {
039
040    /**
041     * The pattern used to match added file lines
042     */
043    private static final Pattern ADDED_PATTERN = Pattern.compile("^A[ M]* (.*)$");
044
045    /**
046     * The pattern used to match modified file lines
047     */
048    private static final Pattern MODIFIED_PATTERN = Pattern.compile("^ *M[ M]* (.*)$");
049
050    /**
051     * The pattern used to match deleted file lines
052     */
053    private static final Pattern DELETED_PATTERN = Pattern.compile("^ *D * (.*)$");
054
055    /**
056     * The pattern used to match renamed file lines
057     */
058    private static final Pattern RENAMED_PATTERN = Pattern.compile("^R  (.*) -> (.*)$");
059
060    private final File workingDirectory;
061
062    private ScmFileSet scmFileSet;
063
064    /**
065     * Entries are relative to working directory, not to the repositoryroot
066     */
067    private final List<ScmFile> changedFiles = new ArrayList<>();
068
069    private URI relativeRepositoryPath;
070
071    // ----------------------------------------------------------------------
072    //
073    // ----------------------------------------------------------------------
074
075    /**
076     * Consumer when workingDirectory and repositoryRootDirectory are the same
077     *
078     * @param workingDirectory the working directory
079     */
080    public GitStatusConsumer(File workingDirectory) {
081        this.workingDirectory = workingDirectory;
082    }
083
084    /**
085     * Assuming that you have to discover the repositoryRoot, this is how you can get the
086     * <code>relativeRepositoryPath</code>
087     * <pre>
088     * URI.create( repositoryRoot ).relativize( fileSet.getBasedir().toURI() )
089     * </pre>
090     *
091     * @param workingDirectory the working directory
092     * @param relativeRepositoryPath the working directory relative to the repository root
093     * @since 1.9
094     * @see GitStatusCommand#createRevparseShowPrefix(ScmFileSet)
095     */
096    public GitStatusConsumer(File workingDirectory, URI relativeRepositoryPath) {
097        this(workingDirectory);
098        this.relativeRepositoryPath = relativeRepositoryPath;
099    }
100
101    /**
102     * Assuming that you have to discover the repositoryRoot, this is how you can get the
103     * <code>relativeRepositoryPath</code>
104     * <pre>
105     * URI.create( repositoryRoot ).relativize( fileSet.getBasedir().toURI() )
106     * </pre>
107     *
108     * @param workingDirectory the working directory
109     * @param scmFileSet fileset with includes and excludes
110     * @since 1.11.0
111     * @see GitStatusCommand#createRevparseShowPrefix(ScmFileSet)
112     */
113    public GitStatusConsumer(File workingDirectory, ScmFileSet scmFileSet) {
114        this(workingDirectory);
115        this.scmFileSet = scmFileSet;
116    }
117
118    /**
119     * Assuming that you have to discover the repositoryRoot, this is how you can get the
120     * <code>relativeRepositoryPath</code>
121     * <pre>
122     * URI.create( repositoryRoot ).relativize( fileSet.getBasedir().toURI() )
123     * </pre>
124     *
125     * @param workingDirectory the working directory
126     * @param relativeRepositoryPath the working directory relative to the repository root
127     * @param scmFileSet fileset with includes and excludes
128     * @since 1.11.0
129     * @see GitStatusCommand#createRevparseShowPrefix(ScmFileSet)
130     */
131    public GitStatusConsumer(File workingDirectory, URI relativeRepositoryPath, ScmFileSet scmFileSet) {
132        this(workingDirectory, scmFileSet);
133        this.relativeRepositoryPath = relativeRepositoryPath;
134    }
135
136    // ----------------------------------------------------------------------
137    // StreamConsumer Implementation
138    // ----------------------------------------------------------------------
139
140    /**
141     * {@inheritDoc}
142     */
143    public void consumeLine(String line) {
144        if (logger.isDebugEnabled()) {
145            logger.debug(line);
146        }
147        if (line == null || line.isEmpty()) {
148            return;
149        }
150
151        ScmFileStatus status = null;
152
153        List<String> files = new ArrayList<>();
154
155        Matcher matcher;
156        if ((matcher = ADDED_PATTERN.matcher(line)).find()) {
157            status = ScmFileStatus.ADDED;
158            files.add(resolvePath(matcher.group(1), relativeRepositoryPath));
159        } else if ((matcher = MODIFIED_PATTERN.matcher(line)).find()) {
160            status = ScmFileStatus.MODIFIED;
161            files.add(resolvePath(matcher.group(1), relativeRepositoryPath));
162        } else if ((matcher = DELETED_PATTERN.matcher(line)).find()) {
163            status = ScmFileStatus.DELETED;
164            files.add(resolvePath(matcher.group(1), relativeRepositoryPath));
165        } else if ((matcher = RENAMED_PATTERN.matcher(line)).find()) {
166            status = ScmFileStatus.RENAMED;
167            files.add(resolvePath(matcher.group(1), relativeRepositoryPath));
168            files.add(resolvePath(matcher.group(2), relativeRepositoryPath));
169            logger.debug("RENAMED status for line '" + line + "' files added '" + matcher.group(1) + "' '"
170                    + matcher.group(2));
171        } else {
172            logger.warn("Ignoring unrecognized line: " + line);
173            return;
174        }
175
176        // If the file isn't a file; don't add it.
177        if (!files.isEmpty()) {
178            if (workingDirectory != null) {
179                if (status == ScmFileStatus.RENAMED) {
180                    String oldFilePath = files.get(0);
181                    String newFilePath = files.get(1);
182                    if (isFile(oldFilePath)) {
183                        logger.debug("file '" + oldFilePath + "' is a file");
184                        return;
185                    } else {
186                        logger.debug("file '" + oldFilePath + "' not a file");
187                    }
188                    if (!isFile(newFilePath)) {
189                        logger.debug("file '" + newFilePath + "' not a file");
190                        return;
191                    } else {
192                        logger.debug("file '" + newFilePath + "' is a file");
193                    }
194                } else if (status == ScmFileStatus.DELETED) {
195                    if (isFile(files.get(0))) {
196                        return;
197                    }
198                } else {
199                    if (!isFile(files.get(0))) {
200                        return;
201                    }
202                }
203            }
204
205            for (String file : files) {
206                if (this.scmFileSet != null && !isFileNameInFileList(this.scmFileSet.getFileList(), file)) {
207                    // skip adding this file
208                } else {
209                    changedFiles.add(new ScmFile(file, status));
210                }
211            }
212        }
213    }
214
215    private boolean isFileNameInFileList(List<File> fileList, String fileName) {
216        if (relativeRepositoryPath == null) {
217            return fileList.contains(new File(fileName));
218        } else {
219            for (File f : fileList) {
220                File file = new File(relativeRepositoryPath.getPath(), fileName);
221                if (file.getPath().endsWith(f.getName())) {
222                    return true;
223                }
224            }
225            return fileList.isEmpty();
226        }
227    }
228
229    private boolean isFile(String file) {
230        File targetFile = new File(workingDirectory, file);
231        return targetFile.isFile();
232    }
233
234    public static String resolvePath(String fileEntry, URI path) {
235        /* Quotes may be included (from the git status line) when an fileEntry includes spaces */
236        String cleanedEntry = stripQuotes(fileEntry);
237        if (path != null) {
238            return resolveURI(cleanedEntry, path).getPath();
239        } else {
240            return cleanedEntry;
241        }
242    }
243
244    /**
245     *
246     * @param fileEntry the fileEntry, must not be {@code null}
247     * @param path the path, must not be {@code null}
248     * @return TODO
249     */
250    public static URI resolveURI(String fileEntry, URI path) {
251        // When using URI.create, spaces need to be escaped but not the slashes, so we can't use
252        // URLEncoder.encode( String, String )
253        // new File( String ).toURI() results in an absolute URI while path is relative, so that can't be used either.
254        return path.relativize(uriFromPath(stripQuotes(fileEntry)));
255    }
256
257    /**
258     * Create an URI whose getPath() returns the given path and getScheme() returns null. The path may contain spaces,
259     * colons, and other special characters.
260     *
261     * @param path the path.
262     * @return the new URI
263     */
264    public static URI uriFromPath(String path) {
265        try {
266            if (path != null && path.indexOf(':') != -1) {
267                // prefixing the path so the part preceding the colon does not become the scheme
268                String tmp = new URI(null, null, "/x" + path, null).toString().substring(2);
269                // the colon is not escaped by default
270                return new URI(tmp.replace(":", "%3A"));
271            } else {
272                return new URI(null, null, path, null);
273            }
274        } catch (URISyntaxException x) {
275            throw new IllegalArgumentException(x.getMessage(), x);
276        }
277    }
278
279    public List<ScmFile> getChangedFiles() {
280        return changedFiles;
281    }
282
283    /**
284     * @param str the (potentially quoted) string, must not be {@code null}
285     * @return the string with a pair of double quotes removed (if they existed)
286     */
287    private static String stripQuotes(String str) {
288        int strLen = str.length();
289        return (strLen > 0 && str.startsWith("\"") && str.endsWith("\""))
290                ? unescape(str.substring(1, strLen - 1))
291                : str;
292    }
293
294    /**
295     * Dequote a quoted string generated by git status --porcelain.
296     * The leading and trailing quotes have already been removed.
297     * @param fileEntry
298     * @return TODO
299     */
300    private static String unescape(String fileEntry) {
301        // If there are no escaped characters, just return the input argument
302        int pos = fileEntry.indexOf('\\');
303        if (pos == -1) {
304            return fileEntry;
305        }
306
307        // We have escaped characters
308        byte[] inba = fileEntry.getBytes();
309        int inSub = 0; // Input subscript into fileEntry
310        byte[] outba = new byte[fileEntry.length()];
311        int outSub = 0; // Output subscript into outba
312
313        while (true) {
314            System.arraycopy(inba, inSub, outba, outSub, pos - inSub);
315            outSub += pos - inSub;
316            inSub = pos + 1;
317            switch ((char) inba[inSub++]) {
318                case '"':
319                    outba[outSub++] = '"';
320                    break;
321
322                case 'a':
323                    outba[outSub++] = 7; // Bell
324                    break;
325
326                case 'b':
327                    outba[outSub++] = '\b';
328                    break;
329
330                case 't':
331                    outba[outSub++] = '\t';
332                    break;
333
334                case 'n':
335                    outba[outSub++] = '\n';
336                    break;
337
338                case 'v':
339                    outba[outSub++] = 11; // Vertical tab
340                    break;
341
342                case 'f':
343                    outba[outSub++] = '\f';
344                    break;
345
346                case 'r':
347                    outba[outSub++] = '\f';
348                    break;
349
350                case '\\':
351                    outba[outSub++] = '\\';
352                    break;
353
354                case '0':
355                case '1':
356                case '2':
357                case '3':
358                    // This assumes that the octal escape here is valid.
359                    byte b = (byte) ((inba[inSub - 1] - '0') << 6);
360                    b |= (byte) ((inba[inSub++] - '0') << 3);
361                    b |= (byte) (inba[inSub++] - '0');
362                    outba[outSub++] = b;
363                    break;
364
365                default:
366                    // This is an invalid escape in a string.  Just copy it.
367                    outba[outSub++] = '\\';
368                    inSub--;
369                    break;
370            }
371            pos = fileEntry.indexOf('\\', inSub);
372            if (pos == -1) // No more backslashes; we're done
373            {
374                System.arraycopy(inba, inSub, outba, outSub, inba.length - inSub);
375                outSub += inba.length - inSub;
376                break;
377            }
378        }
379        try {
380            // explicit say UTF-8, otherwise it'll fail at least on Windows cmdline
381            return new String(outba, 0, outSub, "UTF-8");
382        } catch (UnsupportedEncodingException e) {
383            throw new RuntimeException(e);
384        }
385    }
386}