forked from cedricrupb/code_diff
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiff_utils.py
More file actions
145 lines (90 loc) · 3.75 KB
/
Copy pathdiff_utils.py
File metadata and controls
145 lines (90 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import re
# Diff parsing -----------------------------------------------------------------
class Hunk:
def __init__(self, lines, added_lines, rm_lines, header = None):
self.lines = lines
self.added_lines = set(added_lines)
self.rm_lines = set(rm_lines)
self.header = header
@property
def after(self):
alines = []
for i, line in enumerate(self.lines):
if i in self.rm_lines: continue
if i in self.added_lines:
alines.append(" " + line[1:])
else:
alines.append(line)
return "".join(alines)
@property
def before(self):
alines = []
for i, line in enumerate(self.lines):
if i in self.added_lines: continue
if i in self.rm_lines:
alines.append(" " + line[1:])
else:
alines.append(line)
return "".join(alines)
def __repr__(self):
if self.header:
return self.header + "".join(self.lines)
return "".join(self.lines)
def _parse_hunk(lines, start, end):
hunk_lines = lines[start + 1:end]
added_lines = []
rm_lines = []
for i, hline in enumerate(hunk_lines):
if hline.startswith("+"): added_lines.append(i)
if hline.startswith("-"): rm_lines.append(i)
return Hunk(hunk_lines, added_lines, rm_lines, header = lines[start])
hunk_pat = re.compile("@@ -(\d+)(,\d+)? \+(\d+)(,\d+)? @@.*")
def parse_hunks(diff):
lines = diff.splitlines(True)
hunks = []
start_ix = -1
end_ix = -1
for line_ix, line in enumerate(lines):
if hunk_pat.match(line):
end_ix = line_ix - 1
if start_ix >= 0 and start_ix < end_ix:
hunks.append(_parse_hunk(lines, start_ix, end_ix))
start_ix = line_ix
end_ix = len(lines)
if start_ix >= 0 and start_ix < end_ix:
hunks.append(_parse_hunk(lines, start_ix, end_ix))
return hunks
# Diff cleaning --------------------------------
def _has_incomplete_comment(lines):
is_incomplete2 = False
is_incomplete1 = False
for line in lines:
count2 = line.count("\"\"\"")
if count2 % 2 == 1: is_incomplete2 = not is_incomplete2
count1 = line.count("\'\'\'")
if count1 % 2 == 1: is_incomplete1 = not is_incomplete1
return is_incomplete1 or is_incomplete2
def _determine_incomplete_comment(lines):
last_incomplete2 = -1
last_incomplete1 = -1
for i, line in enumerate(lines):
count2 = line.count("\"\"\"")
if count2 % 2 == 1:
last_incomplete2 = i if last_incomplete2 == -1 else -1
count1 = line.count("\'\'\'")
if count1 % 2 == 1:
last_incomplete1 = i if last_incomplete1 == -1 else -1
assert last_incomplete1 != -1 or last_incomplete2 != -1
last_incomplete = last_incomplete2 if last_incomplete2 != -1 else last_incomplete1
dist_to_end = len(lines) - last_incomplete
if last_incomplete < dist_to_end:
return last_incomplete + 1, len(lines)
else:
return 0, last_incomplete
def clean_hunk(hunk):
if not _has_incomplete_comment(hunk.lines): return hunk
start, end = _determine_incomplete_comment(hunk.lines)
new_lines = hunk.lines[start:end]
added_lines = [l - start for l in hunk.added_lines if l >= start and l < end]
rm_lines = [l - start for l in hunk.rm_lines if l >= start and l < end]
return Hunk(new_lines, added_lines, rm_lines, header = hunk.header)