forked from tobami/codespeed
-
-
Notifications
You must be signed in to change notification settings - Fork 19
Expand file tree
/
Copy pathgithub.py
More file actions
177 lines (139 loc) · 5.86 KB
/
github.py
File metadata and controls
177 lines (139 loc) · 5.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# encoding: utf-8
"""
Specialized Git backend which uses Github.com for all of the heavy work
Among other things, this means that the codespeed server doesn't need to have
git installed, the ability to write files, etc.
"""
from __future__ import absolute_import
import logging
try:
# Python 3
from urllib.request import urlopen
from urllib.request import Request
except ImportError:
# Python 2
from urllib2 import urlopen
from urllib2 import Request
import re
import json
import isodate
from django.core.cache import cache
from django.conf import settings
from .exceptions import CommitLogError
logger = logging.getLogger(__name__)
GITHUB_URL_RE = re.compile(
r'^(?P<proto>\w+)://github.com/(?P<username>[^/]+)/(?P<project>[^/]+)([.]git)?$')
# We currently use a simple linear search of on a single parent to retrieve
# the history. This is often good enough, but might miss the actual starting
# point. Thus, we need to terminate the search after a resonable number of
# revisions.
GITHUB_REVISION_LIMIT = 10
def updaterepo(project, update=True):
return
def fetch_json(url):
json_obj = cache.get(url)
if json_obj is None:
github_oauth_token = getattr(settings, 'GITHUB_OAUTH_TOKEN', None)
if github_oauth_token:
headers = {'Authorization': 'token %s' % (github_oauth_token)}
else:
headers = {}
request = Request(url=url, headers=headers)
try:
json_obj = json.load(urlopen(request))
except IOError as e:
logger.exception("Unable to load %s: %s",
url, e, exc_info=True)
raise e
if "message" in json_obj and \
json_obj["message"] in ("Not Found", "Server Error",):
# We'll still cache these for a brief period of time to avoid
# making too many requests:
cache.set(url, json_obj, 300)
else:
# We'll cache successes for a very long period of time since
# SCM diffs shouldn't change:
cache.set(url, json_obj, 86400 * 30)
if "message" in json_obj and \
json_obj["message"] in ("Not Found", "Server Error",):
raise CommitLogError(
"Unable to load %s: %s" % (url, json_obj["message"]))
return json_obj
def retrieve_tag(commit_id, username, project):
tags_url = 'https://api.github.com/repos/%s/%s/git/refs/tags' % (
username, project)
tags_json = fetch_json(tags_url)
for tag in tags_json:
if tag['object']['sha'] == commit_id:
return tag['ref'].split("refs/tags/")[-1]
return ""
def retrieve_revision(commit_id, username, project, revision=None):
commit_url = 'https://api.github.com/repos/%s/%s/git/commits/%s' % (
username, project, commit_id)
commit_json = fetch_json(commit_url)
date = isodate.parse_datetime(commit_json['committer']['date'])
tag = retrieve_tag(commit_id, username, project)
if revision:
# Overwrite any existing data we might have for this revision since
# we never want our records to be out of sync with the actual VCS:
if not getattr(settings, 'USE_TZ_AWARE_DATES', False):
# We need to convert the timezone-aware date to a naive (i.e.
# timezone-less) date in UTC to avoid killing MySQL:
logger.debug('USE_TZ_AWARE_DATES setting is set to False, '
'converting datetime object to a naive one')
revision.date = date.astimezone(
isodate.tzinfo.Utc()).replace(tzinfo=None)
revision.author = commit_json['author']['name']
revision.message = commit_json['message']
revision.full_clean()
revision.save()
return {'date': date,
'message': commit_json['message'],
'body': "", # TODO: pretty-print diffs
'author': commit_json['author']['name'],
'author_email': commit_json['author']['email'],
'commitid': commit_json['sha'],
'short_commit_id': commit_json['sha'][0:7],
'parents': commit_json['parents'],
'tag': tag}
def getlogs(endrev, startrev):
if endrev != startrev:
revisions = endrev.branch.revisions.filter(
date__lte=endrev.date, date__gte=startrev.date)
else:
revisions = [i for i in (startrev, endrev) if i.commitid]
if endrev.branch.project.repo_path[-1] == '/':
endrev.branch.project.repo_path = endrev.branch.project.repo_path[:-1]
m = GITHUB_URL_RE.match(endrev.branch.project.repo_path)
if not m:
raise ValueError(
"Unable to parse Github URL %s" % endrev.branch.project.repo_path)
username = m.group("username")
project = m.group("project")
logs = []
last_rev_data = None
revision_count = 0
ancestor_found = False
# TODO: get all revisions between endrev and startrev,
# not only those present in the Codespeed DB
for revision in revisions:
last_rev_data = retrieve_revision(
revision.commitid, username, project, revision)
logs.append(last_rev_data)
revision_count += 1
ancestor_found = (
startrev.commitid in [
rev['sha'] for rev in last_rev_data['parents']])
# Simple approach to find the startrev, stop after found or after
# #GITHUB_REVISION_LIMIT revisions are fetched
while (revision_count < GITHUB_REVISION_LIMIT
and not ancestor_found
and len(last_rev_data['parents']) > 0):
last_rev_data = retrieve_revision(
last_rev_data['parents'][0]['sha'], username, project)
logs.append(last_rev_data)
revision_count += 1
ancestor_found = (
startrev.commitid in [
rev['sha'] for rev in last_rev_data['parents']])
return sorted(logs, key=lambda i: i['date'], reverse=True)