3
3
#include "run-command.h"
4
4
#include "quote.h"
5
5
#include "sigchain.h"
6
+ #include "streaming.h"
6
7
7
8
/*
8
9
* convert.c - convert a file when checking it out and checking it in.
13
14
* translation when the "text" attribute or "auto_crlf" option is set.
14
15
*/
15
16
16
- /* Stat bits: When BIN is set, the txt bits are unset */
17
17
#define CONVERT_STAT_BITS_TXT_LF 0x1
18
18
#define CONVERT_STAT_BITS_TXT_CRLF 0x2
19
19
#define CONVERT_STAT_BITS_BIN 0x4
20
+ #define CONVERT_STAT_BITS_ANY_CR 0x8
21
+
22
+ #define STREAM_BUFFER_SIZE (1024*16)
20
23
21
24
enum crlf_action {
22
25
CRLF_UNDEFINED ,
@@ -31,30 +34,36 @@ enum crlf_action {
31
34
32
35
struct text_stat {
33
36
/* NUL, CR, LF and CRLF counts */
34
- unsigned nul , lonecr , lonelf , crlf ;
37
+ unsigned stat_bits , lonecr , lonelf , crlf ;
35
38
36
39
/* These are just approximations! */
37
40
unsigned printable , nonprintable ;
38
41
};
39
42
40
- static void gather_stats (const char * buf , unsigned long size , struct text_stat * stats )
43
+ static void gather_stats_partly (const char * buf , unsigned long size ,
44
+ struct text_stat * stats , unsigned search_only )
41
45
{
42
46
unsigned long i ;
43
47
44
- memset ( stats , 0 , sizeof ( * stats ));
45
-
48
+ if (! buf || ! size )
49
+ return ;
46
50
for (i = 0 ; i < size ; i ++ ) {
47
51
unsigned char c = buf [i ];
48
52
if (c == '\r' ) {
53
+ stats -> stat_bits |= CONVERT_STAT_BITS_ANY_CR ;
49
54
if (i + 1 < size && buf [i + 1 ] == '\n' ) {
50
55
stats -> crlf ++ ;
51
56
i ++ ;
52
- } else
57
+ stats -> stat_bits |= CONVERT_STAT_BITS_TXT_CRLF ;
58
+ } else {
53
59
stats -> lonecr ++ ;
60
+ stats -> stat_bits |= CONVERT_STAT_BITS_BIN ;
61
+ }
54
62
continue ;
55
63
}
56
64
if (c == '\n' ) {
57
65
stats -> lonelf ++ ;
66
+ stats -> stat_bits |= CONVERT_STAT_BITS_TXT_LF ;
58
67
continue ;
59
68
}
60
69
if (c == 127 )
@@ -67,14 +76,16 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
67
76
stats -> printable ++ ;
68
77
break ;
69
78
case 0 :
70
- stats -> nul ++ ;
79
+ stats -> stat_bits |= CONVERT_STAT_BITS_BIN ;
71
80
/* fall through */
72
81
default :
73
82
stats -> nonprintable ++ ;
74
83
}
75
84
}
76
85
else
77
86
stats -> printable ++ ;
87
+ if (stats -> stat_bits & search_only )
88
+ break ; /* We found what we have been searching for */
78
89
}
79
90
80
91
/* If file ends with EOF then don't count this EOF as non-printable. */
@@ -86,41 +97,62 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
86
97
* The same heuristics as diff.c::mmfile_is_binary()
87
98
* We treat files with bare CR as binary
88
99
*/
89
- static int convert_is_binary ( unsigned long size , const struct text_stat * stats )
100
+ static void convert_nonprintable ( struct text_stat * stats )
90
101
{
91
- if (stats -> lonecr )
92
- return 1 ;
93
- if (stats -> nul )
94
- return 1 ;
95
102
if ((stats -> printable >> 7 ) < stats -> nonprintable )
96
- return 1 ;
97
- return 0 ;
103
+ stats -> stat_bits |= CONVERT_STAT_BITS_BIN ;
98
104
}
99
105
100
- static unsigned int gather_convert_stats (const char * data , unsigned long size )
106
+ static void gather_all_stats (const char * buf , unsigned long size ,
107
+ struct text_stat * stats , unsigned search_only )
101
108
{
109
+ memset (stats , 0 , sizeof (* stats ));
110
+ gather_stats_partly (buf , size , stats , search_only );
111
+ convert_nonprintable (stats );
112
+ }
113
+
114
+
115
+ static unsigned get_convert_stats_sha1 (unsigned const char * sha1 ,
116
+ unsigned search_only )
117
+ {
118
+ struct git_istream * st ;
102
119
struct text_stat stats ;
103
- int ret = 0 ;
104
- if (!data || !size )
105
- return 0 ;
106
- gather_stats (data , size , & stats );
107
- if (convert_is_binary (size , & stats ))
108
- ret |= CONVERT_STAT_BITS_BIN ;
109
- if (stats .crlf )
110
- ret |= CONVERT_STAT_BITS_TXT_CRLF ;
111
- if (stats .lonelf )
112
- ret |= CONVERT_STAT_BITS_TXT_LF ;
120
+ enum object_type type ;
121
+ unsigned long sz ;
113
122
114
- return ret ;
123
+ if (!sha1 )
124
+ return 0 ;
125
+ memset (& stats , 0 , sizeof (stats ));
126
+ st = open_istream (sha1 , & type , & sz , NULL );
127
+ if (!st ) {
128
+ return 0 ;
129
+ }
130
+ if (type != OBJ_BLOB )
131
+ goto close_and_exit_i ;
132
+ for (;;) {
133
+ char buf [STREAM_BUFFER_SIZE ];
134
+ ssize_t readlen = read_istream (st , buf , sizeof (buf ));
135
+ if (readlen < 0 )
136
+ break ;
137
+ if (!readlen )
138
+ break ;
139
+ gather_stats_partly (buf , (unsigned long )readlen , & stats , search_only );
140
+ if (stats .stat_bits & search_only )
141
+ break ; /* We found what we have been searching for */
142
+ }
143
+ close_and_exit_i :
144
+ close_istream (st );
145
+ convert_nonprintable (& stats );
146
+ return stats .stat_bits ;
115
147
}
116
148
117
- static const char * gather_convert_stats_ascii ( const char * data , unsigned long size )
149
+ static const char * convert_stats_ascii ( unsigned convert_stats )
118
150
{
119
- unsigned int convert_stats = gather_convert_stats ( data , size );
120
-
151
+ const unsigned eol_bits = CONVERT_STAT_BITS_TXT_LF |
152
+ CONVERT_STAT_BITS_TXT_CRLF ;
121
153
if (convert_stats & CONVERT_STAT_BITS_BIN )
122
154
return "-text" ;
123
- switch (convert_stats ) {
155
+ switch (convert_stats & eol_bits ) {
124
156
case CONVERT_STAT_BITS_TXT_LF :
125
157
return "lf" ;
126
158
case CONVERT_STAT_BITS_TXT_CRLF :
@@ -132,24 +164,45 @@ static const char *gather_convert_stats_ascii(const char *data, unsigned long si
132
164
}
133
165
}
134
166
167
+ static unsigned get_convert_stats_wt (const char * path )
168
+ {
169
+ struct text_stat stats ;
170
+ unsigned search_only = CONVERT_STAT_BITS_BIN ;
171
+ int fd ;
172
+ memset (& stats , 0 , sizeof (stats ));
173
+ fd = open (path , O_RDONLY );
174
+ if (fd < 0 )
175
+ return 0 ;
176
+ for (;;) {
177
+ char buf [STREAM_BUFFER_SIZE ];
178
+ ssize_t readlen = read (fd , buf , sizeof (buf ));
179
+ if (readlen < 0 )
180
+ break ;
181
+ if (!readlen )
182
+ break ;
183
+ gather_stats_partly (buf , (unsigned long )readlen , & stats , search_only );
184
+ if (stats .stat_bits & search_only )
185
+ break ; /* We found what we have been searching for */
186
+ }
187
+ close (fd );
188
+ convert_nonprintable (& stats );
189
+ return stats .stat_bits ;
190
+ }
191
+
135
192
const char * get_cached_convert_stats_ascii (const char * path )
136
193
{
137
- const char * ret ;
138
- unsigned long sz ;
139
- void * data = read_blob_data_from_cache (path , & sz );
140
- ret = gather_convert_stats_ascii (data , sz );
141
- free (data );
142
- return ret ;
194
+ unsigned convert_stats ;
195
+ unsigned search_only = CONVERT_STAT_BITS_BIN ;
196
+ convert_stats = get_convert_stats_sha1 (get_sha1_from_cache (path ),
197
+ search_only );
198
+ return convert_stats_ascii (convert_stats );
143
199
}
144
200
145
201
const char * get_wt_convert_stats_ascii (const char * path )
146
202
{
147
- const char * ret = "" ;
148
- struct strbuf sb = STRBUF_INIT ;
149
- if (strbuf_read_file (& sb , path , 0 ) >= 0 )
150
- ret = gather_convert_stats_ascii (sb .buf , sb .len );
151
- strbuf_release (& sb );
152
- return ret ;
203
+ unsigned convert_stats ;
204
+ convert_stats = get_convert_stats_wt (path );
205
+ return convert_stats_ascii (convert_stats );
153
206
}
154
207
155
208
static int text_eol_is_crlf (void )
@@ -213,16 +266,10 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
213
266
214
267
static int has_cr_in_index (const char * path )
215
268
{
216
- unsigned long sz ;
217
- void * data ;
218
- int has_cr ;
219
-
220
- data = read_blob_data_from_cache (path , & sz );
221
- if (!data )
222
- return 0 ;
223
- has_cr = memchr (data , '\r' , sz ) != NULL ;
224
- free (data );
225
- return has_cr ;
269
+ unsigned convert_stats ;
270
+ convert_stats = get_convert_stats_sha1 (get_sha1_from_cache (path ),
271
+ CONVERT_STAT_BITS_ANY_CR );
272
+ return convert_stats & CONVERT_STAT_BITS_ANY_CR ;
226
273
}
227
274
228
275
static int will_convert_lf_to_crlf (size_t len , struct text_stat * stats ,
@@ -234,13 +281,13 @@ static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
234
281
if (!stats -> lonelf )
235
282
return 0 ;
236
283
237
- if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF ) {
284
+ if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_CRLF ) {
238
285
/* If we have any CR or CRLF line endings, we do not touch it */
239
286
/* This is the new safer autocrlf-handling */
240
287
if (stats -> lonecr || stats -> crlf )
241
288
return 0 ;
242
289
243
- if (convert_is_binary ( len , stats ) )
290
+ if (stats -> stat_bits & CONVERT_STAT_BITS_BIN )
244
291
return 0 ;
245
292
}
246
293
return 1 ;
@@ -253,7 +300,8 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
253
300
{
254
301
struct text_stat stats ;
255
302
char * dst ;
256
- int convert_crlf_into_lf ;
303
+ int has_crlf_to_convert ;
304
+ unsigned search_only = 0 ;
257
305
258
306
if (crlf_action == CRLF_BINARY ||
259
307
(src && !len ))
@@ -266,12 +314,16 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
266
314
if (!buf && !src )
267
315
return 1 ;
268
316
269
- gather_stats (src , len , & stats );
317
+ if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF )
318
+ search_only = CONVERT_STAT_BITS_BIN ;
319
+
320
+ gather_all_stats (src , len , & stats , search_only );
321
+
270
322
/* Optimization: No CRLF? Nothing to convert, regardless. */
271
- convert_crlf_into_lf = !!stats .crlf ;
323
+ has_crlf_to_convert = !!stats .crlf ;
272
324
273
325
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF ) {
274
- if (convert_is_binary ( len , & stats ) )
326
+ if (stats . stat_bits & CONVERT_STAT_BITS_BIN )
275
327
return 0 ;
276
328
/*
277
329
* If the file in the index has any CR in it, do not convert.
@@ -280,24 +332,35 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
280
332
if (checksafe == SAFE_CRLF_RENORMALIZE )
281
333
checksafe = SAFE_CRLF_FALSE ;
282
334
else if (has_cr_in_index (path ))
283
- convert_crlf_into_lf = 0 ;
335
+ has_crlf_to_convert = 0 ;
284
336
}
285
337
if (checksafe && len ) {
286
338
struct text_stat new_stats ;
287
339
memcpy (& new_stats , & stats , sizeof (new_stats ));
288
340
/* simulate "git add" */
289
- if (convert_crlf_into_lf ) {
341
+ if (has_crlf_to_convert ) {
290
342
new_stats .lonelf += new_stats .crlf ;
291
343
new_stats .crlf = 0 ;
344
+ /* all crlf, if any, are gone. Update the bits */
345
+ new_stats .stat_bits = stats .stat_bits & CONVERT_STAT_BITS_BIN ;
346
+ if (new_stats .lonelf )
347
+ new_stats .stat_bits |= CONVERT_STAT_BITS_TXT_LF ;
348
+ if (new_stats .lonecr )
349
+ new_stats .stat_bits |= CONVERT_STAT_BITS_ANY_CR ;
292
350
}
293
351
/* simulate "git checkout" */
294
352
if (will_convert_lf_to_crlf (len , & new_stats , crlf_action )) {
295
353
new_stats .crlf += new_stats .lonelf ;
296
354
new_stats .lonelf = 0 ;
355
+ new_stats .stat_bits = stats .stat_bits & CONVERT_STAT_BITS_BIN ;
356
+ if (new_stats .crlf )
357
+ new_stats .stat_bits |= CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_ANY_CR ;
358
+ if (new_stats .lonecr )
359
+ new_stats .stat_bits |= CONVERT_STAT_BITS_ANY_CR ;
297
360
}
298
361
check_safe_crlf (path , crlf_action , & stats , & new_stats , checksafe );
299
362
}
300
- if (!convert_crlf_into_lf )
363
+ if (!has_crlf_to_convert )
301
364
return 0 ;
302
365
303
366
/*
@@ -338,11 +401,15 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
338
401
{
339
402
char * to_free = NULL ;
340
403
struct text_stat stats ;
404
+ unsigned search_only = 0 ;
341
405
342
406
if (!len || output_eol (crlf_action ) != EOL_CRLF )
343
407
return 0 ;
344
408
345
- gather_stats (src , len , & stats );
409
+ if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_CRLF )
410
+ search_only = CONVERT_STAT_BITS_ANY_CR | CONVERT_STAT_BITS_BIN ;
411
+
412
+ gather_all_stats (src , len , & stats , search_only );
346
413
if (!will_convert_lf_to_crlf (len , & stats , crlf_action ))
347
414
return 0 ;
348
415
0 commit comments