77import re
88import objects .blob as blob
99
10+
11+ class Diffable (object ):
12+ """
13+ Common interface for all object that can be diffed against another object of compatible type.
14+
15+ NOTE:
16+ Subclasses require a repo member as it is the case for Object instances, for practical
17+ reasons we do not derive from Object.
18+ """
19+ __slots__ = tuple ()
20+
21+ # subclasses provide additional arguments to the git-diff comamnd by supplynig
22+ # them in this tuple
23+ _diff_args = tuple ()
24+
25+ # Temporary standin for Index type until we have a real index type
26+ class Index (object ):
27+ pass
28+
29+ def diff (self , other = None , paths = None , create_patch = False , ** kwargs ):
30+ """
31+ Creates diffs between two items being trees, trees and index or an
32+ index and the working tree.
33+
34+ ``other``
35+ Is the item to compare us with.
36+ If None, we will be compared to the working tree.
37+ If Index ( type ), it will be compared against the index
38+
39+ ``paths``
40+ is a list of paths or a single path to limit the diff to.
41+ It will only include at least one of the givne path or paths.
42+
43+ ``create_patch``
44+ If True, the returned Diff contains a detailed patch that if applied
45+ makes the self to other. Patches are somwhat costly as blobs have to be read
46+ and diffed.
47+
48+ ``kwargs``
49+ Additional arguments passed to git-diff, such as
50+ R=True to swap both sides of the diff.
51+
52+ Returns
53+ git.DiffIndex
54+
55+ Note
56+ Rename detection will only work if create_patch is True
57+ """
58+ args = list (self ._diff_args [:])
59+ args .append ( "--abbrev=40" ) # we need full shas
60+ args .append ( "--full-index" ) # get full index paths, not only filenames
61+
62+ if create_patch :
63+ args .append ("-p" )
64+ args .append ("-M" ) # check for renames
65+ else :
66+ args .append ("--raw" )
67+
68+ if paths is not None and not isinstance (paths , (tuple ,list )):
69+ paths = [ paths ]
70+
71+ if other is not None and other is not self .Index :
72+ args .insert (0 , other )
73+ if other is self .Index :
74+ args .insert (0 , "--cached" )
75+
76+ args .insert (0 ,self )
77+
78+ # paths is list here or None
79+ if paths :
80+ args .append ("--" )
81+ args .extend (paths )
82+ # END paths handling
83+
84+ kwargs ['as_process' ] = True
85+ proc = self .repo .git .diff (* args , ** kwargs )
86+
87+ diff_method = Diff ._index_from_raw_format
88+ if create_patch :
89+ diff_method = Diff ._index_from_patch_format
90+ return diff_method (self .repo , proc .stdout )
91+
92+
93+ class DiffIndex (list ):
94+ """
95+ Implements an Index for diffs, allowing a list of Diffs to be queried by
96+ the diff properties.
97+
98+ The class improves the diff handling convenience
99+ """
100+ # change type invariant identifying possible ways a blob can have changed
101+ # A = Added
102+ # D = Deleted
103+ # R = Renamed
104+ # NOTE: 'Modified' mode is impllied as it wouldn't be listed as a diff otherwise
105+ change_type = ("A" , "D" , "R" )
106+
107+
108+ def iter_change_type (self , change_type ):
109+ """
110+ Return
111+ iterator yieling Diff instances that match the given change_type
112+
113+ ``change_type``
114+ Member of DiffIndex.change_type
115+ """
116+ if change_type not in self .change_type :
117+ raise ValueError ( "Invalid change type: %s" % change_type )
118+
119+ for diff in self :
120+ if change_type == "A" and diff .new_file :
121+ yield diff
122+ elif change_type == "D" and diff .deleted_file :
123+ yield diff
124+ elif change_type == "R" and diff .renamed :
125+ yield diff
126+ # END for each diff
127+
128+
10129class Diff (object ):
11130 """
12- A Diff contains diff information between two commits .
131+ A Diff contains diff information between two Trees .
13132
14133 It contains two sides a and b of the diff, members are prefixed with
15134 "a" and "b" respectively to inidcate that.
@@ -27,7 +146,7 @@ class Diff(object):
27146 ``Deleted File``::
28147
29148 b_mode is None
30- b_blob is NOne
149+ b_blob is None
31150 """
32151
33152 # precompiled regex
@@ -46,7 +165,7 @@ class Diff(object):
46165 """ , re .VERBOSE | re .MULTILINE )
47166 re_is_null_hexsha = re .compile ( r'^0{40}$' )
48167 __slots__ = ("a_blob" , "b_blob" , "a_mode" , "b_mode" , "new_file" , "deleted_file" ,
49- "rename_from" , "rename_to" , "renamed" , " diff" )
168+ "rename_from" , "rename_to" , "diff" )
50169
51170 def __init__ (self , repo , a_path , b_path , a_blob_id , b_blob_id , a_mode ,
52171 b_mode , new_file , deleted_file , rename_from ,
@@ -62,31 +181,45 @@ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
62181
63182 self .a_mode = a_mode
64183 self .b_mode = b_mode
184+
65185 if self .a_mode :
66186 self .a_mode = blob .Blob ._mode_str_to_int ( self .a_mode )
67187 if self .b_mode :
68188 self .b_mode = blob .Blob ._mode_str_to_int ( self .b_mode )
189+
69190 self .new_file = new_file
70191 self .deleted_file = deleted_file
71- self .rename_from = rename_from
72- self .rename_to = rename_to
73- self .renamed = rename_from != rename_to
192+
193+ # be clear and use None instead of empty strings
194+ self .rename_from = rename_from or None
195+ self .rename_to = rename_to or None
196+
74197 self .diff = diff
75198
199+ @property
200+ def renamed (self ):
201+ """
202+ Returns:
203+ True if the blob of our diff has been renamed
204+ """
205+ return self .rename_from != self .rename_to
206+
76207 @classmethod
77- def _list_from_string (cls , repo , text ):
208+ def _index_from_patch_format (cls , repo , stream ):
78209 """
79- Create a new diff object from the given text
210+ Create a new DiffIndex from the given text which must be in patch format
80211 ``repo``
81212 is the repository we are operating on - it is required
82213
83- ``text ``
84- result of 'git diff' between two commits or one commit and the index
214+ ``stream ``
215+ result of 'git diff' as a stream (supporting file protocol)
85216
86217 Returns
87- git.Diff[]
218+ git.DiffIndex
88219 """
89- diffs = []
220+ # for now, we have to bake the stream
221+ text = stream .read ()
222+ index = DiffIndex ()
90223
91224 diff_header = cls .re_header .match
92225 for diff in ('\n ' + text ).split ('\n diff --git' )[1 :]:
@@ -97,9 +230,51 @@ def _list_from_string(cls, repo, text):
97230 a_blob_id , b_blob_id , b_mode = header .groups ()
98231 new_file , deleted_file = bool (new_file_mode ), bool (deleted_file_mode )
99232
100- diffs .append (Diff (repo , a_path , b_path , a_blob_id , b_blob_id ,
233+ index .append (Diff (repo , a_path , b_path , a_blob_id , b_blob_id ,
101234 old_mode or deleted_file_mode , new_mode or new_file_mode or b_mode ,
102235 new_file , deleted_file , rename_from , rename_to , diff [header .end ():]))
103236
104- return diffs
237+ return index
238+
239+ @classmethod
240+ def _index_from_raw_format (cls , repo , stream ):
241+ """
242+ Create a new DiffIndex from the given stream which must be in raw format.
243+
244+ NOTE:
245+ This format is inherently incapable of detecting renames, hence we only
246+ modify, delete and add files
247+
248+ Returns
249+ git.DiffIndex
250+ """
251+ # handles
252+ # :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M .gitignore
253+ index = DiffIndex ()
254+ for line in stream :
255+ if not line .startswith (":" ):
256+ continue
257+ # END its not a valid diff line
258+ old_mode , new_mode , a_blob_id , b_blob_id , change_type , path = line [1 :].split ()
259+ a_path = path
260+ b_path = path
261+ deleted_file = False
262+ new_file = False
263+
264+ # NOTE: We cannot conclude from the existance of a blob to change type
265+ # as diffs with the working do not have blobs yet
266+ if change_type == 'D' :
267+ b_path = None
268+ deleted_file = True
269+ elif change_type == 'A' :
270+ a_path = None
271+ new_file = True
272+ # END add/remove handling
273+
274+ diff = Diff (repo , a_path , b_path , a_blob_id , b_blob_id , old_mode , new_mode ,
275+ new_file , deleted_file , None , None , '' )
276+ index .append (diff )
277+ # END for each line
278+
279+ return index
105280
0 commit comments