|
| 1 | +import subprocess |
| 2 | +import json |
| 3 | +import csv |
| 4 | +import sys |
| 5 | +import os |
| 6 | + |
| 7 | +""" |
| 8 | +This script collects CodeQL queries that are part of code scanning query packs |
| 9 | +and prints CSV data to stdout that describes which packs contain which queries. |
| 10 | +
|
| 11 | +Errors are printed to stderr. This script requires that 'git' and 'codeql' commands |
| 12 | +are on the PATH. It'll try to automatically set the CodeQL search path correctly, |
| 13 | +as long as you run the script from one of the following locations: |
| 14 | + - anywhere from within a clone of the CodeQL Git repo |
| 15 | + - from the parent directory of a clone of the CodeQL Git repo (assuming 'codeql' |
| 16 | + and 'codeql-go' directories both exist) |
| 17 | +""" |
| 18 | + |
| 19 | +# Define which languages and query packs to consider |
| 20 | +languages = [ "cpp", "csharp", "go", "java", "javascript", "python"] |
| 21 | +packs = [ "code-scanning", "security-and-quality", "security-extended" ] |
| 22 | + |
| 23 | + |
| 24 | +def prefix_repo_nwo(filename): |
| 25 | + """ |
| 26 | + Replaces an absolute path prefix with a GitHub repository name with owner (NWO). |
| 27 | + This function relies on `git` being available. |
| 28 | +
|
| 29 | + For example: |
| 30 | + /home/alice/git/ql/java/ql/src/MyQuery.ql |
| 31 | + becomes: |
| 32 | + github/codeql/java/ql/src/MyQuery.ql |
| 33 | + |
| 34 | + If we can't detect a known NWO (e.g. github/codeql, github/codeql-go), the |
| 35 | + path will be truncated to the root of the git repo: |
| 36 | + ql/java/ql/src/MyQuery.ql |
| 37 | + |
| 38 | + If the filename is not part of a Git repo, the return value is the |
| 39 | + same as the input value: the whole path. |
| 40 | + """ |
| 41 | + dirname = os.path.dirname(filename) |
| 42 | + |
| 43 | + try: |
| 44 | + git_toplevel_dir_subp = subprocess_run(["git", "-C", dirname, "rev-parse", "--show-toplevel"]) |
| 45 | + except: |
| 46 | + # Not a Git repo |
| 47 | + return filename |
| 48 | + |
| 49 | + git_toplevel_dir = git_toplevel_dir_subp.stdout.strip() |
| 50 | + |
| 51 | + # Detect 'github/codeql' and 'github/codeql-go' repositories by checking the remote (it's a bit |
| 52 | + # of a hack but will work in most cases, as long as the remotes have 'codeql' and 'codeql-go' |
| 53 | + # in the URL |
| 54 | + git_remotes = subprocess_run(["git","-C",dirname,"remote","-v"]).stdout.strip() |
| 55 | + |
| 56 | + if "codeql-go" in git_remotes: prefix = "github/codeql-go" |
| 57 | + elif "codeql" in git_remotes: prefix = "github/codeql" |
| 58 | + else: prefix = os.path.basename(git_toplevel_dir) |
| 59 | + |
| 60 | + return os.path.join(prefix, filename[len(git_toplevel_dir)+1:]) |
| 61 | + |
| 62 | + |
| 63 | +def single_spaces(input): |
| 64 | + """ |
| 65 | + Workaround for https://github.com/github/codeql-coreql-team/issues/470 which causes |
| 66 | + some metadata strings to contain newlines and spaces without a good reason. |
| 67 | + """ |
| 68 | + return " ".join(input.split()) |
| 69 | + |
| 70 | + |
| 71 | +def get_query_metadata(key, metadata, queryfile): |
| 72 | + """Returns query metadata or prints a warning to stderr if a particular piece of metadata is not available.""" |
| 73 | + if key in metadata: return single_spaces(metadata[key]) |
| 74 | + query_id = metadata['id'] if 'id' in metadata else 'unknown' |
| 75 | + print("Warning: no '%s' metadata for query with ID '%s' (%s)" % (key, query_id, queryfile), file=sys.stderr) |
| 76 | + return "" |
| 77 | + |
| 78 | + |
| 79 | +def subprocess_run(cmd): |
| 80 | + """Runs a command through subprocess.run, with a few tweaks. Raises an Exception if exit code != 0.""" |
| 81 | + return subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy(), check=True) |
| 82 | + |
| 83 | + |
| 84 | + |
| 85 | +try: # Check for `git` on path |
| 86 | + subprocess_run(["git","--version"]) |
| 87 | +except Exception as e: |
| 88 | + print("Error: couldn't invoke 'git'. Is it on the path? Aborting.", file=sys.stderr) |
| 89 | + raise e |
| 90 | + |
| 91 | +try: # Check for `codeql` on path |
| 92 | + subprocess_run(["codeql","--version"]) |
| 93 | +except Exception as e: |
| 94 | + print("Error: couldn't invoke CodeQL CLI 'codeql'. Is it on the path? Aborting.", file=sys.stderr) |
| 95 | + raise e |
| 96 | + |
| 97 | +# Define CodeQL search path so it'll find the CodeQL repositories: |
| 98 | +# - anywhere in the current Git clone (including current working directory) |
| 99 | +# - the 'codeql' subdirectory of the cwd |
| 100 | +# |
| 101 | +# (and assumes the codeql-go repo is in a similar location) |
| 102 | +codeql_search_path = "./codeql:./codeql-go:." # will be extended further down |
| 103 | + |
| 104 | +# Extend CodeQL search path by detecting root of the current Git repo (if any). This means that you |
| 105 | +# can run this script from any location within the CodeQL git repository. |
| 106 | +try: |
| 107 | + git_toplevel_dir = subprocess_run(["git","rev-parse","--show-toplevel"]) |
| 108 | + |
| 109 | + # Current working directory is in a Git repo. Add it to the search path, just in case it's the CodeQL repo |
| 110 | + git_toplevel_dir = git_toplevel_dir.stdout.strip() |
| 111 | + codeql_search_path += ":" + git_toplevel_dir + ":" + git_toplevel_dir + "/../codeql-go" |
| 112 | +except: |
| 113 | + # git rev-parse --show-toplevel exited with non-zero exit code. We're not in a Git repo |
| 114 | + pass |
| 115 | + |
| 116 | +# Create CSV writer and write CSV header to stdout |
| 117 | +csvwriter = csv.writer(sys.stdout) |
| 118 | +csvwriter.writerow([ |
| 119 | + "Query filename", "Suite", "Query name", "Query ID", |
| 120 | + "Kind", "Severity", "Precision", "Tags" |
| 121 | +]) |
| 122 | + |
| 123 | +# Iterate over all languages and packs, and resolve which queries are part of those packs |
| 124 | +for lang in languages: |
| 125 | + for pack in packs: |
| 126 | + # Get absolute paths to queries in this pack by using 'codeql resolve queries' |
| 127 | + try: |
| 128 | + queries_subp = subprocess_run(["codeql","resolve","queries","--search-path", codeql_search_path, "%s-%s.qls" % (lang, pack)]) |
| 129 | + except Exception as e: |
| 130 | + # Resolving queries might go wrong if the github/codeql and github/codeql-go repositories are not |
| 131 | + # on the search path. |
| 132 | + print( |
| 133 | + "Warning: couldn't find query pack '%s' for language '%s'. Do you have the right repositories in the right places (search path: '%s')?" % (pack, lang, codeql_search_path), |
| 134 | + file=sys.stderr |
| 135 | + ) |
| 136 | + continue |
| 137 | + |
| 138 | + # Investigate metadata for every query by using 'codeql resolve metadata' |
| 139 | + for queryfile in queries_subp.stdout.strip().split("\n"): |
| 140 | + query_metadata_json = subprocess_run(["codeql","resolve","metadata",queryfile]).stdout.strip() |
| 141 | + |
| 142 | + # Turn an absolute path to a query file into an nwo-prefixed path (e.g. github/codeql/java/ql/src/....) |
| 143 | + queryfile_nwo = prefix_repo_nwo(queryfile) |
| 144 | + |
| 145 | + meta = json.loads(query_metadata_json) |
| 146 | + |
| 147 | + # Python's CSV writer will automatically quote fields if necessary |
| 148 | + csvwriter.writerow([ |
| 149 | + queryfile_nwo, pack, |
| 150 | + get_query_metadata('name', meta, queryfile_nwo), |
| 151 | + get_query_metadata('id', meta, queryfile_nwo), |
| 152 | + get_query_metadata('kind', meta, queryfile_nwo), |
| 153 | + get_query_metadata('problem.severity', meta, queryfile_nwo), |
| 154 | + get_query_metadata('precision', meta, queryfile_nwo), |
| 155 | + get_query_metadata('tags', meta, queryfile_nwo) |
| 156 | + ]) |
| 157 | + |
0 commit comments