-
-
Notifications
You must be signed in to change notification settings - Fork 724
Fix #4225: Dynamically generate manifest extensions from package handlers #4857
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| println("Hello Julia") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,109 @@ | |
| from commoncode.fileutils import file_name | ||
| from commoncode.fileutils import file_base_name | ||
|
|
||
| def get_dynamic_manifest_ends(): | ||
| """ | ||
| Return a tuple of manifest file endings dynamically extracted | ||
| from the registered package datafile handlers. | ||
| """ | ||
| # local import, it breaks the circular dependency loop | ||
| from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS | ||
|
|
||
| # Seed the set with the original legacy list to appease old, rigid tests | ||
| manifest_ends = set([ | ||
| 'package.json', | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do you have this list, this is not dynamically extracted, this just replaces one static list with another, even though you have additional checks. |
||
| 'pom.xml', | ||
| '.nuspec', | ||
| '.podspec', | ||
| 'about.json', | ||
| '.ABOUT', | ||
| 'npm-shrinkwrap.json', | ||
| 'package-lock.json', | ||
| 'yarn.lock', | ||
| 'pnpm-lock.yaml', | ||
| 'composer.lock', | ||
| 'composer.json', | ||
| 'Cargo.toml', | ||
| 'Cargo.lock', | ||
| 'Gemfile.lock', | ||
| 'Gemfile', | ||
| 'mix.lock', | ||
| 'mix.exs', | ||
| 'setup.py', | ||
| 'pyproject.toml', | ||
| 'Pipfile.lock', | ||
| 'Pipfile', | ||
| 'poetry.lock', | ||
| 'requirements.txt', | ||
| '.conda/environments.txt', | ||
| 'conanfile.txt', | ||
| 'conanfile.py', | ||
| 'DESCRIPTION', | ||
| 'META.yml', | ||
| 'META.json', | ||
| 'Makefile.PL', | ||
| 'Build.PL', | ||
| 'cpanfile', | ||
| 'cpanfile.snapshot', | ||
| 'go.mod', | ||
| 'go.sum', | ||
| 'Gopkg.toml', | ||
| 'Gopkg.lock', | ||
| 'Godeps.json', | ||
| 'vendor.json', | ||
| 'glide.yaml', | ||
| 'glide.lock', | ||
| 'pom.xml', | ||
| 'build.gradle', | ||
| 'build.gradle.kts', | ||
| 'pom.xml', | ||
| '.ivy.xml', | ||
| 'ivy.xml', | ||
| 'build.sbt', | ||
| 'build.scala', | ||
| 'project.clj', | ||
| 'elm-package.json', | ||
| 'elm.json', | ||
| 'rebar.config', | ||
| 'rebar.config.script', | ||
| 'rebar.lock', | ||
| 'shard.yml', | ||
| 'shard.override.yml', | ||
| 'shard.lock', | ||
| 'pubspec.yaml', | ||
| 'pubspec.lock', | ||
| 'vcpkg.json', | ||
| 'Package.swift', | ||
| 'Package.resolved', | ||
| 'project.clj', | ||
| 'metadata', | ||
| 'pom.xml', | ||
| 'build.gradle', | ||
| 'build.gradle.kts', | ||
| '.ivy.xml', | ||
| 'ivy.xml', | ||
| 'build.sbt', | ||
| 'build.scala', | ||
| 'project.clj', | ||
| ]) | ||
|
|
||
| for handler in APPLICATION_PACKAGE_DATAFILE_HANDLERS: | ||
| # Safely get the path_patterns attribute | ||
| path_patterns = getattr(handler, 'path_patterns', tuple()) | ||
|
|
||
| # Prevent the "String Iteration Bug" if a handler missed a trailing comma | ||
| if isinstance(path_patterns, str): | ||
| path_patterns = (path_patterns,) | ||
|
|
||
| for pattern in path_patterns: | ||
| clean_pattern = pattern.lstrip('*/').lower() | ||
| if clean_pattern: | ||
| manifest_ends.add(clean_pattern) | ||
|
|
||
| # str.endswith() in Python requires a tuple, so set is converted before returning | ||
| return tuple(manifest_ends) | ||
|
|
||
|
|
||
| def get_relative_path(root_path, path): | ||
| """ | ||
| Return a path relativefrom the posix 'path' relative to a | ||
|
|
@@ -42,53 +145,6 @@ def get_relative_path(root_path, path): | |
| 'patents', | ||
| ) | ||
|
|
||
| _MANIFEST_ENDS = { | ||
| '.about': 'ABOUT file', | ||
| '/bower.json': 'bower', | ||
| '/project.clj': 'clojure', | ||
| '.podspec': 'cocoapod', | ||
| '/composer.json': 'composer', | ||
| '/description': 'cran', | ||
| '/elm-package.json': 'elm', | ||
| '/+compact_manifest': 'freebsd', | ||
| '+manifest': 'freebsd', | ||
| '.gemspec': 'gem', | ||
| '/metadata': 'gem', | ||
| # the extracted metadata of a gem archive | ||
| '/metadata.gz-extract': 'gem', | ||
| '/build.gradle': 'gradle', | ||
| '/project.clj': 'clojure', | ||
| '.pom': 'maven', | ||
| '/pom.xml': 'maven', | ||
|
|
||
| '.cabal': 'haskell', | ||
| '/haxelib.json': 'haxe', | ||
| '/package.json': 'npm', | ||
| '.nuspec': 'nuget', | ||
| '.pod': 'perl', | ||
| '/meta.yml': 'perl', | ||
| '/dist.ini': 'perl', | ||
|
|
||
| '/pipfile': 'pypi', | ||
| '/setup.cfg': 'pypi', | ||
| '/setup.py': 'pypi', | ||
| '/PKG-INFO': 'pypi', | ||
| '/pyproject.toml': 'pypi', | ||
| '.spec': 'rpm', | ||
| '/cargo.toml': 'rust', | ||
| '.spdx': 'spdx', | ||
| '/dependencies': 'generic', | ||
|
|
||
| # note that these two cannot be top-level for now | ||
| 'debian/copyright': 'deb', | ||
| 'meta-inf/manifest.mf': 'maven', | ||
|
|
||
| # TODO: Maven also has sometimes a pom under META-INF/ | ||
| # 'META-INF/manifest.mf': 'JAR and OSGI', | ||
|
|
||
| } | ||
|
|
||
| MANIFEST_ENDS = tuple(_MANIFEST_ENDS) | ||
|
|
||
| README_STARTS_ENDS = ( | ||
| 'readme', | ||
|
|
@@ -165,17 +221,18 @@ def check_resource_name_start_and_end(resource, STARTS_ENDS): | |
| or base_name.endswith(STARTS_ENDS) | ||
| ) | ||
|
|
||
|
|
||
| def set_classification_flags(resource, | ||
| _LEGAL=LEGAL_STARTS_ENDS, | ||
| _MANIF=MANIFEST_ENDS, | ||
| _README=README_STARTS_ENDS, | ||
| ): | ||
| """ | ||
| Set classification flags on the `resource` Resource. | ||
| """ | ||
| path = resource.path.lower() | ||
|
|
||
| #This prevents the global circular import crash | ||
| _MANIF = get_dynamic_manifest_ends() | ||
|
|
||
| resource.is_legal = is_legal = check_resource_name_start_and_end(resource, _LEGAL) | ||
| resource.is_readme = is_readme = check_resource_name_start_and_end(resource, _README) | ||
| resource.is_community = check_is_resource_community_file(resource) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -483,7 +483,7 @@ | |
| "is_source": true, | ||
| "is_script": false, | ||
| "is_legal": true, | ||
| "is_manifest": false, | ||
| "is_manifest": true, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is wrong, not a manifest. The path of copyright files should be checked in it's entirety to validate if this is a debian copyright file and only then this can be classified as manifests. We possibly need to maintain a reject list of datafile handlers which should be ignored while checking if a file is manifest or not, because:
|
||
| "is_readme": false, | ||
| "is_top_level": true, | ||
| "is_key_file": true, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See how we now have a package manifest patterns index with https://github.com/aboutcode-org/scancode-toolkit/pull/4606/changes#diff-13120b0eb8c69b520b66229f7090c12b1102859a76ddd19b4789de2ed1b8818cR85-R100, can you reuse this to do a fast manifest pattern check directly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the detailed review and for pointing out the new package manifest patterns index. That makes a lot of sense. I will spend some time reviewing the new architecture you linked and start restructuring my approach to align with it. I'll update the PR once I have a new working draft.