Skip to content

Commit cbc63d6

Browse files
committed
skeleton of sync_git_clones.py; still a fair amount of work to do
1 parent 661a1fb commit cbc63d6

File tree

2 files changed

+309
-0
lines changed

2 files changed

+309
-0
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ file.
4545
* __scrape_domain.py__ - Python script using requests and BeautifulSoup4 to request all URLs/links/images/CSS/feeds/etc. found on a domain.
4646
* __show_dhcp_fixed_ACKs.pl__ - script to show the most recent DHCP ACKs per IP address for ISC DHCPd, from a log file. Originally written for Vyatta routers that just show the dynamic leases
4747
* __simpleLCDproc.py__ - Simple LCDproc replacement in Python. Uses LCDd server.
48+
* __sync_git_clones.py__ - A script to keep your git clones (in a specified list of directories) in sync
49+
with origin and optionally upstream, and optionally to keep origin's master
50+
branch in sync with upstream.
4851
* __syslogAgeChecker.php__ - script to check timestamp of last syslog line in some files, and send mail if >= X seconds
4952
* __syslogDatesGraph.php__ - script to help visualize time distribution of syslog messages. This is the graph host part.
5053
* __syslogDatesToArray.php__ - script to help visualize time distribution of syslog messages. This is the log host part.

sync_git_clones.py

Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
#!/usr/bin/env python
2+
"""
3+
sync_git_clones.py
4+
-------------------
5+
6+
A script to keep your git clones (in a specified list of directories) in sync
7+
with origin and optionally upstream, and optionally to keep origin's master
8+
branch in sync with upstream.
9+
10+
Main Features
11+
=============
12+
13+
* Fail/exit if one of a list of shell commands fail (use to ensure that ssh-agent
14+
must be running, VPN connection must be up, etc.)
15+
* Operate on all git repos in specified directories (REPO_DIRS) non-recursively
16+
* Fetch origin for each git repo found
17+
* Optionally switch to master branch and pull (controlled globally via ENABLE_PULL
18+
and per-repo via REPO_OPTIONS)
19+
* If using github API (see below):
20+
* Add fetch refs to fetch PRs as branches.
21+
* If the repo is a fork, add a remote for the upstream (parent). Optionally, pull
22+
master on the upstream and push back to origin (keep origin master in sync with
23+
upstream).
24+
25+
Warnings / ToDo
26+
===============
27+
28+
* GitPython 0.3.2 fails with a TypeError (Cannot handle reference type: "'refs/pull/1/head'")
29+
on any GitHub repos that are setup to check out PRs as branches (i.e. as described
30+
in: https://help.github.com/articles/checking-out-pull-requests-locally). I'm working on
31+
a PR to fix this, but until then... caveat emptor.
32+
33+
Requirements
34+
============
35+
36+
Unfortunately this only works with python2, as GitPython (its gitdb package) does
37+
not yet support python3. All efforts have been made to keep everything within this
38+
script ready for python3 once GitPython chooses to support it.
39+
40+
No, this isn't a real Python package. You should run it from a virtualenv with these
41+
requirements (feel free to ``pip isntall`` them as seen here):
42+
43+
* GitPython==0.3.2.RC1
44+
* githup3.py>=0.8.2 (if using GitHub integration; tested with 0.8.2)
45+
46+
Configuration
47+
=============
48+
49+
Configuration is stored as JSON, in a text configuration file at
50+
``~/.sync_git_clones.conf.py`` by default. Running this script without an existing
51+
configuration file and with the ``-g`` option will cause it to write a sample config
52+
file to disk, for you to edit.
53+
54+
The configuration file supports the following keys:
55+
* __gitdirs__ - (list of strings) a list of directories to search _non_-recursively for
56+
git directories/clones. These will be passed through os.path.expanduser and
57+
os.pathabspath before being used.
58+
* __skipdirty__ - (boolean) If true, skip past dirty repos and log an error.
59+
* __only_fetch_origin__ - (boolean) If true, only fetch a remote called "origin".
60+
Otherwise, fetch all remotes.
61+
* __github__ - (boolean) whether to enable GitHub API integration.
62+
63+
If you want to use the GitHub API integration, you should have an API key/token available.
64+
This script will parse ~/.gitconfig using the ConfigParser module, looking for github.token
65+
as explained in the [Local GitHub Config blog post](https://github.com/blog/180-local-github-config).
66+
67+
Changelog
68+
=========
69+
2014-04-26 jantman (Jason Antman) <[email protected]>
70+
- initial version
71+
72+
"""
73+
74+
import optparse
75+
import sys
76+
import logging
77+
import os.path
78+
import json
79+
import git
80+
81+
# prefer the pip vendored pkg_resources
82+
try:
83+
from pip._vendor import pkg_resources
84+
except ImportError:
85+
import pkg_resources
86+
87+
logging.basicConfig(level=logging.WARNING, format="[%(levelname)s %(filename)s:%(lineno)s - %(funcName)s() ] %(message)s")
88+
logger = logging.getLogger(__name__)
89+
90+
def fetch_remote(rmt, dryrun=False):
91+
""" fetch a remote """
92+
if dryrun:
93+
logger.info("DRYRUN - would fetch rmt %s" % rmt.name)
94+
else:
95+
print("fetching remote %s" % rmt.name)
96+
rmt.fetch()
97+
return True
98+
99+
def do_git_dir(path, config, gh_client=None, dryrun=False):
100+
"""
101+
operate on a single git directory/clone
102+
:param path: path to the clone
103+
:type path: string
104+
:param config: config dict
105+
:type config: dict
106+
:param gh_client: a GitHub API client object (TODO)
107+
:type gh_client: TODO
108+
:param dryrun: if true, do not change anything; log actions that would be taken
109+
:type dryrun: boolean
110+
"""
111+
logger.info("doing gitdir %s" % path)
112+
repo = git.Repo(path)
113+
if repo.bare:
114+
logger.warining("Skipping bare repo: %s" % path)
115+
return False
116+
if repo.is_dirty():
117+
if config['skipdirty']:
118+
logger.error("Skipping dirty repo: %s" % path)
119+
return False
120+
else:
121+
raise SystemExit("TODO: implement what to do with dirty repos")
122+
# ok, repo isn't bare or dirty
123+
current_branch = repo.active_branch
124+
logger.debug("current branch is %s" % current_branch)
125+
126+
on_github = False
127+
for rmt in repo.remotes:
128+
if 'github.com' in rmt.url:
129+
on_github = True
130+
131+
if on_github:
132+
# TODO - guard this with a config setting?
133+
do_github_repo(repo, config, gh_client, dryrun=False)
134+
135+
for rmt in repo.remotes:
136+
if rmt.name != 'origin' and config['only_fetch_origin']:
137+
logger.debug("skipping remote %s - only_fetch_origin" % rmt.name)
138+
continue
139+
fetch_remote(rmt, dryrun=dryrun)
140+
if 'github.com' in rmt.url:
141+
on_github = True
142+
143+
# guard with config setting TODO
144+
# if branch is not master, switch to master; pull; switch back to original branch
145+
146+
return True
147+
148+
def do_github_repo(repo, config, gh_client, dryrun=False):
149+
"""
150+
operate on a single git directory/clone of a GitHub repo
151+
:param repo: a GitPython Repository object, passed in from do_git_dir
152+
:type path: Repository
153+
:param config: config dict
154+
:type config: dict
155+
:param gh_client: TODO
156+
:param dryrun: if true, do not change anything; log actions that would be taken
157+
:type dryrun: boolean
158+
"""
159+
raise SystemExit("Do GitHub stuff here")
160+
161+
def get_github_client(config, dryrun=False):
162+
""" read API key from git config and return a <TODO> github client instance """
163+
# `git config --global github.token` and trim that, make sure it's 40 characters
164+
165+
# try to instantiate API client, and connect
166+
# return client object
167+
return None
168+
169+
def main(configpath='~/.sync_git_clines.conf.py', dryrun=False, genconfig=False):
170+
"""
171+
main entry point
172+
173+
:param config: path to configuration file
174+
:type config: string
175+
:param dryrun: if true, do not change anything; log actions that would be taken
176+
:type dryrun: boolean
177+
:param genconfig: if config file does not exist, write a sample one and exit
178+
:type genconfig: boolean
179+
"""
180+
logger.debug("main called with config=%s" % configpath)
181+
if dryrun:
182+
logger.warning("dryrun=True - no changes will actually be made")
183+
configpath = os.path.abspath(os.path.expanduser(configpath))
184+
logger.debug("config expanded to '%s'" % configpath)
185+
186+
if not os.path.exists(configpath):
187+
logger.debug("config file does not exist")
188+
if genconfig:
189+
logger.debug("generating sample config file")
190+
generate_config(configpath, dryrun=dryrun)
191+
raise SystemExit("Sample configuration file written to: %s" % configpath)
192+
else:
193+
raise SystemExit("ERROR: configuration file does not exist. Run with -g|--genconfig to write a sample config at %s" % configpath)
194+
195+
# attempt to read JSON config
196+
config = load_config(configpath)
197+
logger.debug("config loaded")
198+
199+
if config['github']:
200+
gh_client = get_github_client(config, dryrun=dryrun)
201+
else:
202+
gh_client = None
203+
logger.info("github integration disabled by config")
204+
205+
git_dirs = get_git_dirs(config)
206+
logger.info("found %d git directories" % len(git_dirs))
207+
for d in git_dirs:
208+
do_git_dir(d, config, gh_client=gh_client, dryrun=dryrun)
209+
210+
def get_git_dirs(config):
211+
""" get a list of all git directories to examine """
212+
logger.debug("finding git directories")
213+
gitdirs = []
214+
for d in config['gitdirs']:
215+
d = os.path.abspath(os.path.expanduser(d))
216+
logger.debug("checking %s" % d)
217+
for name in os.listdir(d):
218+
path = os.path.join(d, name)
219+
if os.path.isdir(path) and os.path.isdir(os.path.join(path, '.git')):
220+
if path in gitdirs:
221+
logger.debug("found git dir but already in list: %s" % path)
222+
else:
223+
logger.debug("found git dir: %s" % path)
224+
gitdirs.append(path)
225+
return gitdirs
226+
227+
def check_versions():
228+
"""
229+
checks that requirements have supported versions
230+
231+
this is mainly needed for GitPython, where we rely on features
232+
in the heavily-rewritten 0.3.2RC1 version, which is marked as
233+
beta / RC. ``pip install GitPython`` currently yields 0.1.7, which
234+
is utterly useless.
235+
236+
thanks to @qwcode for this simple logic
237+
"""
238+
gp_req_str = 'GitPython>=0.3.2.RC1'
239+
gp_req = pkg_resources.Requirement.parse(gp_req_str)
240+
gp_dist = pkg_resources.get_distribution('GitPython')
241+
logger.debug("Checking GitPython requirement")
242+
if gp_dist not in gp_req:
243+
raise SystemExit("ERROR: sync_git_clones.py requires %s" % gp_req_str)
244+
logger.debug("All requirements satisfied")
245+
return True
246+
247+
def load_config(configpath):
248+
""" load the configuration file at configpath """
249+
logger.debug("loading config from %s" % configpath)
250+
with open(configpath, 'r') as fh:
251+
configstr = fh.read()
252+
config = json.loads(configstr)
253+
254+
# apply defaults
255+
defaults = {'skipdirty': True, 'only_fetch_origin': False}
256+
for k in defaults:
257+
if k not in config:
258+
logger.debug("applying default config value for %s" % (k))
259+
config[k] = defaults[k]
260+
return config
261+
262+
def generate_config(configpath, dryrun=False):
263+
""" Write out a sample config file. """
264+
config = {'gitdirs': ['~/GIT', '/path/to/dir'],
265+
'skipdirty': True,
266+
'github': True,
267+
}
268+
logger.debug("serializing sample config")
269+
configstr = json.dumps(config, sort_keys=True, indent=4, separators=(',', ': '))
270+
logger.debug("writing serialized sample config to %s" % configpath)
271+
if dryrun:
272+
logger.info("DRYRUN: would have written to %s: \n%s" % (path, configstr))
273+
else:
274+
with open(configpath, 'w') as fh:
275+
fh.write(configstr)
276+
logger.debug("sample config written")
277+
return True
278+
279+
def parse_args(argv):
280+
""" parse arguments with OptionParser """
281+
parser = optparse.OptionParser()
282+
283+
parser.add_option('-c', '--config', dest='config', action='store', type='string',
284+
default='~/.sync_git_clones.conf.py',
285+
help='JSON config file location (default: ~/.sync_git_clones.conf.py)')
286+
287+
parser.add_option('-t', '--test', dest='test', action='store_true', default=False,
288+
help='test / dry-run - do not take any action, print what would be done')
289+
290+
parser.add_option('-v', '--verbose', dest='verbose', action='count',
291+
help='verbose output on what actions are being taken. Specify twice for debug-level output.')
292+
293+
parser.add_option('-g', '--gen-config', dest='genconfig', action='store_true', default=False,
294+
help='if config file does not exist, generate a sample one and exit')
295+
296+
options, args = parser.parse_args(argv)
297+
return options
298+
299+
if __name__ == "__main__":
300+
opts = parse_args(sys.argv)
301+
if opts.verbose > 1:
302+
logger.setLevel(logging.DEBUG)
303+
elif opts.verbose == 1:
304+
logger.setLevel(logging.INFO)
305+
check_versions()
306+
main(configpath=opts.config, dryrun=opts.test, genconfig=opts.genconfig)

0 commit comments

Comments
 (0)