@@ -293,6 +293,12 @@ def parse_args(args=None):
293293 dest = "include_pull_comments" ,
294294 help = "include pull request review comments in backup" ,
295295 )
296+ parser .add_argument (
297+ "--pull-reviews" ,
298+ action = "store_true" ,
299+ dest = "include_pull_reviews" ,
300+ help = "include pull request reviews in backup" ,
301+ )
296302 parser .add_argument (
297303 "--pull-commits" ,
298304 action = "store_true" ,
@@ -2427,6 +2433,57 @@ def backup_issues(args, repo_cwd, repository, repos_template):
24272433 os .replace (issue_file + ".temp" , issue_file ) # Atomic write
24282434
24292435
2436+ PULL_OPTIONAL_DATA_KEYS = (
2437+ "comment_regular_data" ,
2438+ "comment_data" ,
2439+ "commit_data" ,
2440+ "review_data" ,
2441+ )
2442+ PULL_REVIEWS_LAST_UPDATE_FILENAME = "reviews_last_update"
2443+
2444+
2445+ def read_json_file_if_exists (path ):
2446+ if not os .path .isfile (path ):
2447+ return None
2448+
2449+ try :
2450+ with codecs .open (path , "r" , encoding = "utf-8" ) as f :
2451+ return json .load (f )
2452+ except (OSError , UnicodeDecodeError , json .decoder .JSONDecodeError ) as e :
2453+ logger .debug ("Error reading existing JSON file {0}: {1}" .format (path , e ))
2454+ return None
2455+
2456+
2457+ def restore_existing_pull_optional_data (pull , existing_pull ):
2458+ if not existing_pull :
2459+ return
2460+
2461+ for key in PULL_OPTIONAL_DATA_KEYS :
2462+ if key not in pull and key in existing_pull :
2463+ pull [key ] = existing_pull [key ]
2464+
2465+
2466+ def get_pull_reviews_since (args , pulls_cwd ):
2467+ args_since = getattr (args , "since" , None )
2468+ if not args .incremental :
2469+ return args_since , None , None
2470+
2471+ reviews_last_update_path = os .path .join (
2472+ pulls_cwd , PULL_REVIEWS_LAST_UPDATE_FILENAME
2473+ )
2474+ if not os .path .exists (reviews_last_update_path ):
2475+ # One-time backfill for existing incremental backups: if the user adds
2476+ # --pull-reviews after a repository checkpoint already exists, the
2477+ # repository-level checkpoint would otherwise skip old PRs forever.
2478+ return None , None , reviews_last_update_path
2479+
2480+ reviews_since = open (reviews_last_update_path ).read ().strip ()
2481+ if args_since and reviews_since :
2482+ return min (args_since , reviews_since ), reviews_since , reviews_last_update_path
2483+
2484+ return args_since or reviews_since , reviews_since , reviews_last_update_path
2485+
2486+
24302487def backup_pulls (args , repo_cwd , repository , repos_template ):
24312488 has_pulls_dir = os .path .isdir ("{0}/pulls/.git" .format (repo_cwd ))
24322489 if args .skip_existing and has_pulls_dir :
@@ -2436,7 +2493,20 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
24362493 pulls_cwd = os .path .join (repo_cwd , "pulls" )
24372494 mkdir_p (repo_cwd , pulls_cwd )
24382495
2496+ include_pull_reviews = args .include_pull_reviews or args .include_everything
2497+ repository_since = getattr (args , "since" , None )
2498+ pulls_since = repository_since
2499+ pull_reviews_since = None
2500+ pull_reviews_last_update_path = None
2501+ if include_pull_reviews :
2502+ (
2503+ pulls_since ,
2504+ pull_reviews_since ,
2505+ pull_reviews_last_update_path ,
2506+ ) = get_pull_reviews_since (args , pulls_cwd )
2507+
24392508 pulls = {}
2509+ newest_pull_update = None
24402510 _pulls_template = "{0}/{1}/pulls" .format (repos_template , repository ["full_name" ])
24412511 _issue_template = "{0}/{1}/issues" .format (repos_template , repository ["full_name" ])
24422512 query_args = {
@@ -2446,27 +2516,43 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
24462516 "direction" : "desc" ,
24472517 }
24482518
2519+ def track_newest_pull_update (pull ):
2520+ nonlocal newest_pull_update
2521+ updated_at = pull .get ("updated_at" )
2522+ if updated_at and (
2523+ newest_pull_update is None or updated_at > newest_pull_update
2524+ ):
2525+ newest_pull_update = updated_at
2526+
2527+ def pull_is_due_for_repository_checkpoint (pull ):
2528+ return not repository_since or pull ["updated_at" ] >= repository_since
2529+
24492530 if not args .include_pull_details :
24502531 pull_states = ["open" , "closed" ]
24512532 for pull_state in pull_states :
24522533 query_args ["state" ] = pull_state
24532534 _pulls = retrieve_data (args , _pulls_template , query_args = query_args )
24542535 for pull in _pulls :
2455- if args .since and pull ["updated_at" ] < args .since :
2536+ track_newest_pull_update (pull )
2537+ if pulls_since and pull ["updated_at" ] < pulls_since :
24562538 break
2457- if not args . since or pull ["updated_at" ] >= args . since :
2539+ if not pulls_since or pull ["updated_at" ] >= pulls_since :
24582540 pulls [pull ["number" ]] = pull
24592541 else :
24602542 _pulls = retrieve_data (args , _pulls_template , query_args = query_args )
24612543 for pull in _pulls :
2462- if args .since and pull ["updated_at" ] < args .since :
2544+ track_newest_pull_update (pull )
2545+ if pulls_since and pull ["updated_at" ] < pulls_since :
24632546 break
2464- if not args .since or pull ["updated_at" ] >= args .since :
2465- pulls [pull ["number" ]] = retrieve_data (
2466- args ,
2467- _pulls_template + "/{}" .format (pull ["number" ]),
2468- paginated = False ,
2469- )[0 ]
2547+ if not pulls_since or pull ["updated_at" ] >= pulls_since :
2548+ if pull_is_due_for_repository_checkpoint (pull ):
2549+ pulls [pull ["number" ]] = retrieve_data (
2550+ args ,
2551+ _pulls_template + "/{}" .format (pull ["number" ]),
2552+ paginated = False ,
2553+ )[0 ]
2554+ else :
2555+ pulls [pull ["number" ]] = pull
24702556
24712557 logger .info ("Saving {0} pull requests to disk" .format (len (list (pulls .keys ()))))
24722558 # Comments from pulls API are only _review_ comments
@@ -2476,35 +2562,73 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
24762562 comments_regular_template = _issue_template + "/{0}/comments"
24772563 comments_template = _pulls_template + "/{0}/comments"
24782564 commits_template = _pulls_template + "/{0}/commits"
2565+ reviews_template = _pulls_template + "/{0}/reviews"
2566+ pull_review_errors = False
2567+
24792568 for number , pull in list (pulls .items ()):
24802569 pull_file = "{0}/{1}.json" .format (pulls_cwd , number )
2570+ existing_pull = read_json_file_if_exists (pull_file )
2571+ needs_review_backfill = (
2572+ include_pull_reviews
2573+ and (not existing_pull or "review_data" not in existing_pull )
2574+ )
2575+
24812576 if args .incremental_by_files and os .path .isfile (pull_file ):
24822577 modified = os .path .getmtime (pull_file )
24832578 modified = datetime .fromtimestamp (modified ).strftime ("%Y-%m-%dT%H:%M:%SZ" )
2484- if modified > pull ["updated_at" ]:
2579+ if modified > pull ["updated_at" ] and not needs_review_backfill :
24852580 logger .info (
24862581 "Skipping pull request {0} because it wasn't modified since last backup" .format (
24872582 number
24882583 )
24892584 )
24902585 continue
2491- if args .include_pull_comments or args .include_everything :
2586+
2587+ should_fetch_non_review_data = pull_is_due_for_repository_checkpoint (pull )
2588+ if (
2589+ args .include_pull_comments or args .include_everything
2590+ ) and should_fetch_non_review_data :
24922591 template = comments_regular_template .format (number )
24932592 pulls [number ]["comment_regular_data" ] = retrieve_data (args , template )
24942593 template = comments_template .format (number )
24952594 pulls [number ]["comment_data" ] = retrieve_data (args , template )
2496- if args .include_pull_commits or args .include_everything :
2595+ if include_pull_reviews :
2596+ template = reviews_template .format (number )
2597+ try :
2598+ pulls [number ]["review_data" ] = retrieve_data (args , template )
2599+ except Exception as e :
2600+ pull_review_errors = True
2601+ logger .warning (
2602+ "Unable to retrieve reviews for pull request {0}#{1}, skipping reviews: {2}" .format (
2603+ repository ["full_name" ], number , e
2604+ )
2605+ )
2606+ if (
2607+ args .include_pull_commits or args .include_everything
2608+ ) and should_fetch_non_review_data :
24972609 template = commits_template .format (number )
24982610 pulls [number ]["commit_data" ] = retrieve_data (args , template )
24992611 if args .include_attachments :
25002612 download_attachments (
25012613 args , pulls_cwd , pulls [number ], number , repository , item_type = "pull"
25022614 )
25032615
2616+ restore_existing_pull_optional_data (pull , existing_pull )
2617+
25042618 with codecs .open (pull_file + ".temp" , "w" , encoding = "utf-8" ) as f :
25052619 json_dump (pull , f )
25062620 os .replace (pull_file + ".temp" , pull_file ) # Atomic write
25072621
2622+ if (
2623+ include_pull_reviews
2624+ and args .incremental
2625+ and pull_reviews_last_update_path
2626+ and newest_pull_update
2627+ and not pull_review_errors
2628+ and (not pull_reviews_since or newest_pull_update > pull_reviews_since )
2629+ ):
2630+ open (pull_reviews_last_update_path , "w" ).write (newest_pull_update )
2631+
25082632
25092633def backup_milestones (args , repo_cwd , repository , repos_template ):
25102634 milestone_cwd = os .path .join (repo_cwd , "milestones" )
0 commit comments