From 53cda350a90e7ed904fc5af4d6104ef90463156b Mon Sep 17 00:00:00 2001 From: Fokko Date: Wed, 5 Feb 2025 20:39:04 +0100 Subject: [PATCH] MINOR: Remove release script Including the mention in the `LICENSE` and the `README.md` that still refers to Jira. --- LICENSE | 10 - dev/README.md | 93 ---------- dev/merge_parquet_pr.py | 393 ---------------------------------------- 3 files changed, 496 deletions(-) delete mode 100644 dev/README.md delete mode 100755 dev/merge_parquet_pr.py diff --git a/LICENSE b/LICENSE index b0065815a5..2c96440ccc 100644 --- a/LICENSE +++ b/LICENSE @@ -197,16 +197,6 @@ License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 -------------------------------------------------------------------------------- -This product includes code from Apache Spark. - -* dev/merge_parquet_pr.py is based on Spark's dev/merge_spark_pr.py - -Copyright: 2014 The Apache Software Foundation. -Home page: https://spark.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - This product includes code from Twitter's ElephantBird project. * parquet-hadoop's UnmaterializableRecordCounter.java includes code from diff --git a/dev/README.md b/dev/README.md deleted file mode 100644 index 68fe99c56a..0000000000 --- a/dev/README.md +++ /dev/null @@ -1,93 +0,0 @@ - - -# Parquet Developer Scripts -This directory contains scripts useful to developers when packaging, -testing, or committing to Parquet. - -Merging a pull request requires being a committer on the project. - -* How to merge a Pull request: -have an apache and apache-github remote setup -``` -git remote add apache-github https://github.com/apache/parquet-java.git -git remote add apache https://gitbox.apache.org/repos/asf?p=parquet-java.git -``` -run the following command -``` -dev/merge_parquet_pr.py -``` - -Note: -* The parent directory of your parquet repository must be called parquet-java -* Without jira-python installed you'll have to close the JIRA manually - -example output: -``` -Which pull request would you like to merge? (e.g. 34): -``` -Type the pull request number (from https://github.com/apache/parquet-java/pulls) and hit enter. -``` -=== Pull Request #X === -title Blah Blah Blah -source repo/branch -target master -url https://api.github.com/repos/apache/parquet-java/pulls/X - -Proceed with merging pull request #3? (y/n): -``` -If this looks good, type y and hit enter. -``` -From git-wip-us.apache.org:/repos/asf/parquet-java.git - * [new branch] master -> PR_TOOL_MERGE_PR_3_MASTER -Switched to branch 'PR_TOOL_MERGE_PR_3_MASTER' - -Merge complete (local ref PR_TOOL_MERGE_PR_3_MASTER). Push to apache? (y/n): -``` -A local branch with the merge has been created. -type y and hit enter to push it to apache master -``` -Counting objects: 67, done. -Delta compression using up to 4 threads. -Compressing objects: 100% (26/26), done. -Writing objects: 100% (36/36), 5.32 KiB, done. -Total 36 (delta 17), reused 0 (delta 0) -To git-wip-us.apache.org:/repos/asf/parquet-java.git - b767ac4..485658a PR_TOOL_MERGE_PR_X_MASTER -> master -Restoring head pointer to b767ac4e -Note: checking out 'b767ac4e'. - -You are in 'detached HEAD' state. You can look around, make experimental -changes and commit them, and you can discard any commits you make in this -state without impacting any branches by performing another checkout. - -If you want to create a new branch to retain commits you create, you may -do so (now or later) by using -b with the checkout command again. Example: - - git checkout -b new_branch_name - -HEAD is now at b767ac4... Update README.md -Deleting local branch PR_TOOL_MERGE_PR_X -Deleting local branch PR_TOOL_MERGE_PR_X_MASTER -Pull request #X merged! -Merge hash: 485658a5 - -Would you like to pick 485658a5 into another branch? (y/n): -``` -For now just say n as we have 1 branch diff --git a/dev/merge_parquet_pr.py b/dev/merge_parquet_pr.py deleted file mode 100755 index c670924721..0000000000 --- a/dev/merge_parquet_pr.py +++ /dev/null @@ -1,393 +0,0 @@ -#!/usr/bin/env python - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Utility for creating well-formed pull request merges and pushing them to Apache. -# usage: ./apache-pr-merge.py (see config env vars below) -# -# This utility assumes you already have local a Parquet git folder and that you -# have added remotes corresponding to both (i) the github apache Parquet -# mirror and (ii) the apache git repo. - -import json -import os -import re -import subprocess -import sys -import tempfile -import urllib2 -import getpass - -try: - import jira.client - JIRA_IMPORTED = True -except ImportError: - JIRA_IMPORTED = False - -# Location of your Parquet git development area -PARQUET_HOME = os.path.abspath(__file__).rsplit("/", 2)[0] -PROJECT_NAME = PARQUET_HOME.rsplit("/", 1)[1] -print "PARQUET_HOME = " + PARQUET_HOME -print "PROJECT_NAME = " + PROJECT_NAME - -def lines_from_cmd(cmd): - return subprocess.check_output(cmd.split(" ")).strip().split("\n") - -# Remote name which points to the GitHub site -PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME") -available_remotes = lines_from_cmd("git remote") -if PR_REMOTE_NAME is not None: - if PR_REMOTE_NAME not in available_remotes: - print "ERROR: git remote '%s' is not defined." % PR_REMOTE_NAME - sys.exit(-1) -else: - remote_candidates = ["github-apache", "apache-github"] - # Get first available remote from the list of candidates - PR_REMOTE_NAME = next((remote for remote in available_remotes if remote in remote_candidates), None) - -# Remote name which points to Apache git -PUSH_REMOTE_NAME = os.environ.get("PUSH_REMOTE_NAME", "apache") -# ASF JIRA username -JIRA_USERNAME = os.environ.get("JIRA_USERNAME") -# ASF JIRA password -JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD") - -GITHUB_BASE = "https://github.com/apache/" + PROJECT_NAME + "/pull" -GITHUB_API_BASE = "https://api.github.com/repos/apache/" + PROJECT_NAME -JIRA_BASE = "https://issues.apache.org/jira/browse" -JIRA_API_BASE = "https://issues.apache.org/jira" -# Prefix added to temporary branches -BRANCH_PREFIX = "PR_TOOL" - -os.chdir(PARQUET_HOME) - - -def get_json(url): - try: - return json.load(urllib2.urlopen(url)) - except urllib2.HTTPError as e: - print "Unable to fetch URL, exiting: %s" % url - sys.exit(-1) - - -def fail(msg): - print msg - clean_up() - sys.exit(-1) - - -def run_cmd(cmd): - try: - if isinstance(cmd, list): - return subprocess.check_output(cmd) - else: - return subprocess.check_output(cmd.split(" ")) - except subprocess.CalledProcessError as e: - # this avoids hiding the stdout / stderr of failed processes - print 'Command failed: %s' % cmd - print 'With output:' - print '--------------' - print e.output - print '--------------' - raise e - -def continue_maybe(prompt): - result = raw_input("\n%s (y/n): " % prompt) - if result.lower() != "y": - fail("Okay, exiting") - - -original_head = run_cmd("git rev-parse HEAD")[:8] - - -def clean_up(): - print "Restoring head pointer to %s" % original_head - run_cmd("git checkout %s" % original_head) - - branches = run_cmd("git branch").replace(" ", "").split("\n") - - for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches): - print "Deleting local branch %s" % branch - run_cmd("git branch -D %s" % branch) - - -# merge the requested PR and return the merge hash -def merge_pr(pr_num, target_ref): - pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num) - target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper()) - run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name)) - run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, target_ref, target_branch_name)) - run_cmd("git checkout %s" % target_branch_name) - - had_conflicts = False - try: - run_cmd(['git', 'merge', pr_branch_name, '--squash']) - except Exception as e: - msg = "Error merging: %s\nWould you like to manually fix-up this merge?" % e - continue_maybe(msg) - msg = "Okay, please fix any conflicts and 'git add' conflicting files... Finished?" - continue_maybe(msg) - had_conflicts = True - - commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, - '--pretty=format:%an <%ae>']).split("\n") - distinct_authors = sorted(set(commit_authors), - key=lambda x: commit_authors.count(x), reverse=True) - primary_author = distinct_authors[0] - commits = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, - '--pretty=format:%h [%an] %s']).split("\n\n") - - merge_message_flags = [] - - merge_message_flags += ["-m", title] - if body != None: - merge_message_flags += ["-m", body] - - authors = "\n".join(["Author: %s" % a for a in distinct_authors]) - - merge_message_flags += ["-m", authors] - - if had_conflicts: - committer_name = run_cmd("git config --get user.name").strip() - committer_email = run_cmd("git config --get user.email").strip() - message = "This patch had conflicts when merged, resolved by\nCommitter: %s <%s>" % ( - committer_name, committer_email) - merge_message_flags += ["-m", message] - - # The string "Closes #%s" string is required for GitHub to correctly close the PR - merge_message_flags += [ - "-m", - "Closes #%s from %s and squashes the following commits:" % (pr_num, pr_repo_desc)] - for c in commits: - merge_message_flags += ["-m", c] - - run_cmd(['git', 'commit', '--author="%s"' % primary_author] + merge_message_flags) - - continue_maybe("Merge complete (local ref %s). Push to %s?" % ( - target_branch_name, PUSH_REMOTE_NAME)) - - try: - run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, target_branch_name, target_ref)) - except Exception as e: - clean_up() - fail("Exception while pushing: %s" % e) - - merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8] - clean_up() - print("Pull request #%s merged!" % pr_num) - print("Merge hash: %s" % merge_hash) - return merge_hash - - -def cherry_pick(pr_num, merge_hash, default_branch): - pick_ref = raw_input("Enter a branch name [%s]: " % default_branch) - if pick_ref == "": - pick_ref = default_branch - - pick_branch_name = "%s_PICK_PR_%s_%s" % (BRANCH_PREFIX, pr_num, pick_ref.upper()) - - run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, pick_ref, pick_branch_name)) - run_cmd("git checkout %s" % pick_branch_name) - run_cmd("git cherry-pick -sx %s" % merge_hash) - - continue_maybe("Pick complete (local ref %s). Push to %s?" % ( - pick_branch_name, PUSH_REMOTE_NAME)) - - try: - run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, pick_branch_name, pick_ref)) - except Exception as e: - clean_up() - fail("Exception while pushing: %s" % e) - - pick_hash = run_cmd("git rev-parse %s" % pick_branch_name)[:8] - clean_up() - - print("Pull request #%s picked into %s!" % (pr_num, pick_ref)) - print("Pick hash: %s" % pick_hash) - return pick_ref - - -def fix_version_from_branch(branch, versions): - # Note: Assumes this is a sorted (newest->oldest) list of un-released versions - if branch == "master": - return versions[0] - else: - branch_ver = branch.replace("branch-", "") - return filter(lambda x: x.name.startswith(branch_ver), versions)[-1] - -def exctract_jira_id(title): - m = re.search(r'^(PARQUET-[0-9]+)\b.*$', title, re.IGNORECASE) - if m and m.groups > 0: - return m.group(1).upper() - else: - fail("PR title should be prefixed by a jira id \"PARQUET-XXX: ...\", found: \"%s\"" % title) - -def check_jira(title): - jira_id = exctract_jira_id(title) - asf_jira = jira.client.JIRA({'server': JIRA_API_BASE}, - basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) - try: - issue = asf_jira.issue(jira_id) - except Exception as e: - fail("ASF JIRA could not find %s\n%s" % (jira_id, e)) - -def resolve_jira(title, merge_branches, comment): - asf_jira = jira.client.JIRA({'server': JIRA_API_BASE}, - basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) - - default_jira_id = exctract_jira_id(title) - - jira_id = raw_input("Enter a JIRA id [%s]: " % default_jira_id) - if jira_id == "": - jira_id = default_jira_id - - try: - issue = asf_jira.issue(jira_id) - except Exception as e: - fail("ASF JIRA could not find %s\n%s" % (jira_id, e)) - - cur_status = issue.fields.status.name - cur_summary = issue.fields.summary - cur_assignee = issue.fields.assignee - if cur_assignee is None: - cur_assignee = "NOT ASSIGNED!!!" - else: - cur_assignee = cur_assignee.displayName - - if cur_status == "Resolved" or cur_status == "Closed": - fail("JIRA issue %s already has status '%s'" % (jira_id, cur_status)) - print ("=== JIRA %s ===" % jira_id) - print ("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" % ( - cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id)) - - versions = asf_jira.project_versions("PARQUET") - versions = sorted(versions, key=lambda x: x.name, reverse=True) - versions = filter(lambda x: x.raw['released'] is False, versions) - - default_fix_versions = map(lambda x: fix_version_from_branch(x, versions).name, merge_branches) - for v in default_fix_versions: - # Handles the case where we have forked a release branch but not yet made the release. - # In this case, if the PR is committed to the master branch and the release branch, we - # only consider the release branch to be the fix version. E.g. it is not valid to have - # both 1.1.0 and 1.0.0 as fix versions. - (major, minor, patch) = v.split(".") - if patch == "0": - previous = "%s.%s.%s" % (major, int(minor) - 1, 0) - if previous in default_fix_versions: - default_fix_versions = filter(lambda x: x != v, default_fix_versions) - default_fix_versions = ",".join(default_fix_versions) - - fix_versions = raw_input("Enter comma-separated fix version(s) [%s]: " % default_fix_versions) - if fix_versions == "": - fix_versions = default_fix_versions - fix_versions = fix_versions.replace(" ", "").split(",") - - def get_version_json(version_str): - return filter(lambda v: v.name == version_str, versions)[0].raw - - jira_fix_versions = map(lambda v: get_version_json(v), fix_versions) - - resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0] - asf_jira.transition_issue( - jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment) - - print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions) - -if JIRA_IMPORTED: - jira_login_accepted = False - while not jira_login_accepted: - if JIRA_USERNAME: - print "JIRA username: %s" % JIRA_USERNAME - else: - JIRA_USERNAME = raw_input("Enter JIRA username: ") - - if not JIRA_PASSWORD: - JIRA_PASSWORD = getpass.getpass("Enter JIRA password: ") - - try: - asf_jira = jira.client.JIRA({'server': JIRA_API_BASE}, - basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) - jira_login_accepted = True - except Exception as e: - print "\nJIRA login failed, try again\n" - JIRA_USERNAME = None - JIRA_PASSWORD = None -else: - print "WARNING: Could not find jira python library. Run 'sudo pip install jira' to install." - print "The tool will continue to run but won't handle the JIRA." - print - -branches = get_json("%s/branches" % GITHUB_API_BASE) -branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches]) -# Assumes branch names can be sorted lexicographically -# Julien: I commented this out as we don't have any "branch-*" branch yet -#latest_branch = sorted(branch_names, reverse=True)[0] - -pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ") -pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) - -url = pr["url"] -title = pr["title"] -if JIRA_IMPORTED: - check_jira(title) -body = pr["body"] -target_ref = pr["base"]["ref"] -user_login = pr["user"]["login"] -base_ref = pr["head"]["ref"] -pr_repo_desc = "%s/%s" % (user_login, base_ref) - -if pr["merged"] is True: - print "Pull request %s has already been merged, assuming you want to backport" % pr_num - merge_commit_desc = run_cmd([ - 'git', 'log', '--merges', '--first-parent', - '--grep=pull request #%s' % pr_num, '--oneline']).split("\n")[0] - if merge_commit_desc == "": - fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num) - - merge_hash = merge_commit_desc[:7] - message = merge_commit_desc[8:] - - print "Found: %s" % message - maybe_cherry_pick(pr_num, merge_hash, latest_branch) - sys.exit(0) - -if not bool(pr["mergeable"]): - msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \ - "Continue? (experts only!)" - continue_maybe(msg) - -print ("\n=== Pull Request #%s ===" % pr_num) -print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % ( - title, pr_repo_desc, target_ref, url)) -continue_maybe("Proceed with merging pull request #%s?" % pr_num) - -merged_refs = [target_ref] - -merge_hash = merge_pr(pr_num, target_ref) - -pick_prompt = "Would you like to pick %s into another branch?" % merge_hash -while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y": - merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)] - -if JIRA_IMPORTED: - continue_maybe("Would you like to update the associated JIRA?") - jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num) - resolve_jira(title, merged_refs, jira_comment) -else: - print "WARNING: Could not find jira python library. Run 'sudo pip install jira' to install." - print "Exiting without trying to close the associated JIRA."