View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.analyzer;
19  
20  import com.github.packageurl.MalformedPackageURLException;
21  import com.github.packageurl.PackageURL;
22  import com.github.packageurl.PackageURLBuilder;
23  import org.apache.commons.io.filefilter.NameFileFilter;
24  import org.apache.commons.io.filefilter.SuffixFileFilter;
25  import org.owasp.dependencycheck.Engine;
26  import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
27  import org.owasp.dependencycheck.data.nvd.ecosystem.Ecosystem;
28  import org.owasp.dependencycheck.dependency.Confidence;
29  import org.owasp.dependencycheck.dependency.Dependency;
30  import org.owasp.dependencycheck.dependency.EvidenceType;
31  import org.owasp.dependencycheck.dependency.naming.GenericIdentifier;
32  import org.owasp.dependencycheck.dependency.naming.PurlIdentifier;
33  import org.owasp.dependencycheck.exception.InitializationException;
34  import org.owasp.dependencycheck.utils.FileFilterBuilder;
35  import org.owasp.dependencycheck.utils.Settings;
36  import org.owasp.dependencycheck.utils.UrlStringUtils;
37  import org.slf4j.Logger;
38  import org.slf4j.LoggerFactory;
39  
40  import javax.annotation.concurrent.ThreadSafe;
41  import java.io.File;
42  import java.io.FileFilter;
43  import java.io.IOException;
44  import java.nio.charset.StandardCharsets;
45  import java.nio.file.Files;
46  import java.util.regex.Matcher;
47  import java.util.regex.Pattern;
48  
49  /**
50   * Used to analyze a Python package, and collect information that can be used to
51   * determine the associated CPE.
52   *
53   * @author Dale Visser
54   */
55  @Experimental
56  @ThreadSafe
57  public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
58  
59      /**
60       * The logger.
61       */
62      private static final Logger LOGGER = LoggerFactory.getLogger(PythonPackageAnalyzer.class);
63  
64      /**
65       * A descriptor for the type of dependencies processed or added by this
66       * analyzer.
67       */
68      public static final String DEPENDENCY_ECOSYSTEM = Ecosystem.PYTHON;
69  
70      /**
71       * Used when compiling file scanning regex patterns.
72       */
73      private static final int REGEX_OPTIONS = Pattern.DOTALL | Pattern.CASE_INSENSITIVE;
74  
75      /**
76       * Filename extensions for files to be analyzed.
77       */
78      private static final String EXTENSIONS = "py";
79  
80      /**
81       * Pattern for matching the module doc string in a source file.
82       */
83      private static final Pattern MODULE_DOCSTRING = Pattern.compile("^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
84  
85      /**
86       * Matches assignments to version variables in Python source code.
87       */
88      private static final Pattern VERSION_PATTERN = Pattern.compile("\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
89              REGEX_OPTIONS);
90  
91      /**
92       * Matches assignments to title variables in Python source code.
93       */
94      private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
95  
96      /**
97       * Matches assignments to summary variables in Python source code.
98       */
99      private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
100 
101     /**
102      * Matches assignments to URL/URL variables in Python source code.
103      */
104     private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
105 
106     /**
107      * Matches assignments to home page variables in Python source code.
108      */
109     private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
110 
111     /**
112      * Matches assignments to author variables in Python source code.
113      */
114     private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
115 
116     /**
117      * Filter that detects files named "__init__.py".
118      */
119     private static final FileFilter INIT_PY_FILTER = new NameFileFilter("__init__.py");
120 
121     /**
122      * The file filter for python files.
123      */
124     private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
125 
126     /**
127      * The file filter used to determine which files this analyzer supports.
128      */
129     private static final FileFilter FILTER = FileFilterBuilder.newInstance().addExtensions(EXTENSIONS).build();
130 
131     /**
132      * Returns the name of the Python Package Analyzer.
133      *
134      * @return the name of the analyzer
135      */
136     @Override
137     public String getName() {
138         return "Python Package Analyzer";
139     }
140 
141     /**
142      * Tell that we are used for information collection.
143      *
144      * @return INFORMATION_COLLECTION
145      */
146     @Override
147     public AnalysisPhase getAnalysisPhase() {
148         return AnalysisPhase.INFORMATION_COLLECTION;
149     }
150 
151     /**
152      * Returns the key name for the analyzers enabled setting.
153      *
154      * @return the key name for the analyzers enabled setting
155      */
156     @Override
157     protected String getAnalyzerEnabledSettingKey() {
158         return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
159     }
160 
161     /**
162      * Returns the FileFilter
163      *
164      * @return the FileFilter
165      */
166     @Override
167     protected FileFilter getFileFilter() {
168         return FILTER;
169     }
170 
171     /**
172      * No-op initializer implementation.
173      *
174      * @param engine a reference to the dependency-check engine
175      * @throws InitializationException never thrown
176      */
177     @Override
178     protected void prepareFileTypeAnalyzer(Engine engine) throws InitializationException {
179         // Nothing to do here.
180     }
181 
182     /**
183      * Utility function to create a regex pattern matcher.
184      *
185      * @param name the value to use when constructing the assignment pattern
186      * @return the compiled Pattern
187      */
188     private static Pattern compileAssignPattern(String name) {
189         return Pattern.compile(
190                 String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
191                 REGEX_OPTIONS);
192     }
193 
194     /**
195      * Analyzes python packages and adds evidence to the dependency.
196      *
197      * @param dependency the dependency being analyzed
198      * @param engine the engine being used to perform the scan
199      * @throws AnalysisException thrown if there is an unrecoverable error
200      * analyzing the dependency
201      */
202     @Override
203     protected void analyzeDependency(Dependency dependency, Engine engine)
204             throws AnalysisException {
205         dependency.setEcosystem(DEPENDENCY_ECOSYSTEM);
206         final File file = dependency.getActualFile();
207         final File parent = file.getParentFile();
208         final String parentName = parent.getName();
209         if (INIT_PY_FILTER.accept(file)) {
210             //by definition, the containing folder of __init__.py is considered the package, even the file is empty:
211             //"The __init__.py files are required to make Python treat the directories as containing packages"
212             //see section "6.4 Packages" from https://docs.python.org/2/tutorial/modules.html;
213             dependency.addEvidence(EvidenceType.PRODUCT, file.getName(), "PackageName", parentName, Confidence.HIGHEST);
214             dependency.addEvidence(EvidenceType.VENDOR, file.getName(), "PackageName", parentName, Confidence.MEDIUM);
215             dependency.setName(parentName);
216 
217             final File[] fileList = parent.listFiles(PY_FILTER);
218             if (fileList != null) {
219                 for (final File sourceFile : fileList) {
220                     analyzeFileContents(dependency, sourceFile);
221                 }
222             }
223         } else {
224             engine.removeDependency(dependency);
225         }
226     }
227 
228     /**
229      * This should gather information from leading docstrings, file comments,
230      * and assignments to __version__, __title__, __summary__, __uri__, __url__,
231      * __home*page__, __author__, and their all caps equivalents.
232      *
233      * @param dependency the dependency being analyzed
234      * @param file the file name to analyze
235      * @throws AnalysisException thrown if there is an unrecoverable error
236      */
237     private void analyzeFileContents(Dependency dependency, File file)
238             throws AnalysisException {
239         final String contents;
240         try {
241             contents = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8).trim();
242         } catch (IOException e) {
243             throw new AnalysisException("Problem occurred while reading dependency file.", e);
244         }
245         if (!contents.isEmpty()) {
246             final String source = file.getName();
247             gatherEvidence(dependency, EvidenceType.VERSION, VERSION_PATTERN, contents,
248                     source, "SourceVersion", Confidence.MEDIUM);
249             addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
250                     source, "summary");
251             if (INIT_PY_FILTER.accept(file)) {
252                 addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
253                         contents, source, "docstring");
254             }
255             gatherEvidence(dependency, EvidenceType.PRODUCT, TITLE_PATTERN, contents,
256                     source, "SourceTitle", Confidence.LOW);
257 
258             gatherEvidence(dependency, EvidenceType.VENDOR, AUTHOR_PATTERN, contents,
259                     source, "SourceAuthor", Confidence.MEDIUM);
260             gatherHomePageEvidence(dependency, EvidenceType.VENDOR, URI_PATTERN,
261                     source, "URL", contents);
262             gatherHomePageEvidence(dependency, EvidenceType.VENDOR, HOMEPAGE_PATTERN,
263                     source, "HomePage", contents);
264 
265             try {
266                 final PackageURLBuilder builder = PackageURLBuilder.aPackageURL().withType("pypi").withName(dependency.getName());
267                 if (dependency.getVersion() != null) {
268                     builder.withVersion(dependency.getVersion());
269                 }
270                 final PackageURL purl = builder.build();
271                 dependency.addSoftwareIdentifier(new PurlIdentifier(purl, Confidence.HIGHEST));
272             } catch (MalformedPackageURLException ex) {
273                 LOGGER.debug("Unable to build package url for python", ex);
274                 final GenericIdentifier id;
275                 if (dependency.getVersion() != null) {
276                     id = new GenericIdentifier("generic:" + dependency.getName() + "@" + dependency.getVersion(), Confidence.HIGHEST);
277                 } else {
278                     id = new GenericIdentifier("generic:" + dependency.getName(), Confidence.HIGHEST);
279                 }
280                 dependency.addSoftwareIdentifier(id);
281             }
282         }
283     }
284 
285     /**
286      * Adds summary information to the dependency
287      *
288      * @param dependency the dependency being analyzed
289      * @param pattern the pattern used to perform analysis
290      * @param group the group from the pattern that indicates the data to use
291      * @param contents the data being analyzed
292      * @param source the source name to use when recording the evidence
293      * @param key the key name to use when recording the evidence
294      */
295     private void addSummaryInfo(Dependency dependency, Pattern pattern,
296                                 int group, String contents, String source, String key) {
297         final Matcher matcher = pattern.matcher(contents);
298         final boolean found = matcher.find();
299         if (found) {
300             JarAnalyzer.addDescription(dependency, matcher.group(group),
301                     source, key);
302         }
303     }
304 
305     /**
306      * Collects evidence from the home page URL.
307      *
308      * @param dependency the dependency that is being analyzed
309      * @param type the type of evidence
310      * @param pattern the pattern to match
311      * @param source the source of the evidence
312      * @param name the name of the evidence
313      * @param contents the home page URL
314      */
315     private void gatherHomePageEvidence(Dependency dependency, EvidenceType type, Pattern pattern,
316                                         String source, String name, String contents) {
317         final Matcher matcher = pattern.matcher(contents);
318         if (matcher.find()) {
319             final String url = matcher.group(4);
320             if (UrlStringUtils.isUrl(url)) {
321                 dependency.addEvidence(type, source, name, url, Confidence.MEDIUM);
322             }
323         }
324     }
325 
326     /**
327      * Gather evidence from a Python source file using the given string
328      * assignment regex pattern.
329      *
330      * @param dependency the dependency that is being analyzed
331      * @param type the type of evidence
332      * @param pattern to scan contents with
333      * @param contents of Python source file
334      * @param source for storing evidence
335      * @param name of evidence
336      * @param confidence in evidence
337      */
338     private void gatherEvidence(Dependency dependency, EvidenceType type, Pattern pattern, String contents,
339                                 String source, String name, Confidence confidence) {
340         final Matcher matcher = pattern.matcher(contents);
341         final boolean found = matcher.find();
342         if (found) {
343             dependency.addEvidence(type, source, name, matcher.group(4), confidence);
344             if (type == EvidenceType.VERSION) {
345                 //TODO - this seems broken as we are cycling over py files and could be grabbing versions from multiple?
346                 dependency.setVersion(matcher.group(4));
347                 final String dispName = String.format("%s:%s", dependency.getName(), dependency.getVersion());
348                 dependency.setDisplayFileName(dispName);
349             }
350         }
351     }
352 }