4
4
#include "quote.h"
5
5
#include "sigchain.h"
6
6
#include "pkt-line.h"
7
+ #include "streaming.h"
7
8
8
9
/*
9
10
* convert.c - convert a file when checking it out and checking it in.
14
15
* translation when the "text" attribute or "auto_crlf" option is set.
15
16
*/
16
17
17
- /* Stat bits: When BIN is set, the txt bits are unset */
18
18
#define CONVERT_STAT_BITS_TXT_LF 0x1
19
19
#define CONVERT_STAT_BITS_TXT_CRLF 0x2
20
20
#define CONVERT_STAT_BITS_BIN 0x4
21
+ #define CONVERT_STAT_BITS_ANY_CR 0x8
22
+
23
+ #define STREAM_BUFFER_SIZE (1024*16)
21
24
22
25
enum crlf_action {
23
26
CRLF_UNDEFINED ,
@@ -32,30 +35,36 @@ enum crlf_action {
32
35
33
36
struct text_stat {
34
37
/* NUL, CR, LF and CRLF counts */
35
- unsigned nul , lonecr , lonelf , crlf ;
38
+ unsigned stat_bits , lonecr , lonelf , crlf ;
36
39
37
40
/* These are just approximations! */
38
41
unsigned printable , nonprintable ;
39
42
};
40
43
41
- static void gather_stats (const char * buf , unsigned long size , struct text_stat * stats )
44
+ static void gather_stats_partly (const char * buf , unsigned long size ,
45
+ struct text_stat * stats , unsigned search_only )
42
46
{
43
47
unsigned long i ;
44
48
45
- memset ( stats , 0 , sizeof ( * stats ));
46
-
49
+ if (! buf || ! size )
50
+ return ;
47
51
for (i = 0 ; i < size ; i ++ ) {
48
52
unsigned char c = buf [i ];
49
53
if (c == '\r' ) {
54
+ stats -> stat_bits |= CONVERT_STAT_BITS_ANY_CR ;
50
55
if (i + 1 < size && buf [i + 1 ] == '\n' ) {
51
56
stats -> crlf ++ ;
52
57
i ++ ;
53
- } else
58
+ stats -> stat_bits |= CONVERT_STAT_BITS_TXT_CRLF ;
59
+ } else {
54
60
stats -> lonecr ++ ;
61
+ stats -> stat_bits |= CONVERT_STAT_BITS_BIN ;
62
+ }
55
63
continue ;
56
64
}
57
65
if (c == '\n' ) {
58
66
stats -> lonelf ++ ;
67
+ stats -> stat_bits |= CONVERT_STAT_BITS_TXT_LF ;
59
68
continue ;
60
69
}
61
70
if (c == 127 )
@@ -68,14 +77,16 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
68
77
stats -> printable ++ ;
69
78
break ;
70
79
case 0 :
71
- stats -> nul ++ ;
80
+ stats -> stat_bits |= CONVERT_STAT_BITS_BIN ;
72
81
/* fall through */
73
82
default :
74
83
stats -> nonprintable ++ ;
75
84
}
76
85
}
77
86
else
78
87
stats -> printable ++ ;
88
+ if (stats -> stat_bits & search_only )
89
+ break ; /* We found what we have been searching for */
79
90
}
80
91
81
92
/* If file ends with EOF then don't count this EOF as non-printable. */
@@ -87,41 +98,62 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
87
98
* The same heuristics as diff.c::mmfile_is_binary()
88
99
* We treat files with bare CR as binary
89
100
*/
90
- static int convert_is_binary ( unsigned long size , const struct text_stat * stats )
101
+ static void convert_nonprintable ( struct text_stat * stats )
91
102
{
92
- if (stats -> lonecr )
93
- return 1 ;
94
- if (stats -> nul )
95
- return 1 ;
96
103
if ((stats -> printable >> 7 ) < stats -> nonprintable )
97
- return 1 ;
98
- return 0 ;
104
+ stats -> stat_bits |= CONVERT_STAT_BITS_BIN ;
105
+ }
106
+
107
+ static void gather_all_stats (const char * buf , unsigned long size ,
108
+ struct text_stat * stats , unsigned search_only )
109
+ {
110
+ memset (stats , 0 , sizeof (* stats ));
111
+ gather_stats_partly (buf , size , stats , search_only );
112
+ convert_nonprintable (stats );
99
113
}
100
114
101
- static unsigned int gather_convert_stats (const char * data , unsigned long size )
115
+
116
+ static unsigned get_convert_stats_sha1 (unsigned const char * sha1 ,
117
+ unsigned search_only )
102
118
{
119
+ struct git_istream * st ;
103
120
struct text_stat stats ;
104
- int ret = 0 ;
105
- if (!data || !size )
121
+ enum object_type type ;
122
+ unsigned long sz ;
123
+
124
+ if (!sha1 )
125
+ return 0 ;
126
+ memset (& stats , 0 , sizeof (stats ));
127
+ st = open_istream (sha1 , & type , & sz , NULL );
128
+ if (!st ) {
106
129
return 0 ;
107
- gather_stats (data , size , & stats );
108
- if (convert_is_binary (size , & stats ))
109
- ret |= CONVERT_STAT_BITS_BIN ;
110
- if (stats .crlf )
111
- ret |= CONVERT_STAT_BITS_TXT_CRLF ;
112
- if (stats .lonelf )
113
- ret |= CONVERT_STAT_BITS_TXT_LF ;
114
-
115
- return ret ;
130
+ }
131
+ if (type != OBJ_BLOB )
132
+ goto close_and_exit_i ;
133
+ for (;;) {
134
+ char buf [STREAM_BUFFER_SIZE ];
135
+ ssize_t readlen = read_istream (st , buf , sizeof (buf ));
136
+ if (readlen < 0 )
137
+ break ;
138
+ if (!readlen )
139
+ break ;
140
+ gather_stats_partly (buf , (unsigned long )readlen , & stats , search_only );
141
+ if (stats .stat_bits & search_only )
142
+ break ; /* We found what we have been searching for */
143
+ }
144
+ close_and_exit_i :
145
+ close_istream (st );
146
+ convert_nonprintable (& stats );
147
+ return stats .stat_bits ;
116
148
}
117
149
118
- static const char * gather_convert_stats_ascii ( const char * data , unsigned long size )
150
+ static const char * convert_stats_ascii ( unsigned convert_stats )
119
151
{
120
- unsigned int convert_stats = gather_convert_stats ( data , size );
121
-
152
+ const unsigned eol_bits = CONVERT_STAT_BITS_TXT_LF |
153
+ CONVERT_STAT_BITS_TXT_CRLF ;
122
154
if (convert_stats & CONVERT_STAT_BITS_BIN )
123
155
return "-text" ;
124
- switch (convert_stats ) {
156
+ switch (convert_stats & eol_bits ) {
125
157
case CONVERT_STAT_BITS_TXT_LF :
126
158
return "lf" ;
127
159
case CONVERT_STAT_BITS_TXT_CRLF :
@@ -133,24 +165,45 @@ static const char *gather_convert_stats_ascii(const char *data, unsigned long si
133
165
}
134
166
}
135
167
168
+ static unsigned get_convert_stats_wt (const char * path )
169
+ {
170
+ struct text_stat stats ;
171
+ unsigned search_only = CONVERT_STAT_BITS_BIN ;
172
+ int fd ;
173
+ memset (& stats , 0 , sizeof (stats ));
174
+ fd = open (path , O_RDONLY );
175
+ if (fd < 0 )
176
+ return 0 ;
177
+ for (;;) {
178
+ char buf [STREAM_BUFFER_SIZE ];
179
+ ssize_t readlen = read (fd , buf , sizeof (buf ));
180
+ if (readlen < 0 )
181
+ break ;
182
+ if (!readlen )
183
+ break ;
184
+ gather_stats_partly (buf , (unsigned long )readlen , & stats , search_only );
185
+ if (stats .stat_bits & search_only )
186
+ break ; /* We found what we have been searching for */
187
+ }
188
+ close (fd );
189
+ convert_nonprintable (& stats );
190
+ return stats .stat_bits ;
191
+ }
192
+
136
193
const char * get_cached_convert_stats_ascii (const char * path )
137
194
{
138
- const char * ret ;
139
- unsigned long sz ;
140
- void * data = read_blob_data_from_cache (path , & sz );
141
- ret = gather_convert_stats_ascii (data , sz );
142
- free (data );
143
- return ret ;
195
+ unsigned convert_stats ;
196
+ unsigned search_only = CONVERT_STAT_BITS_BIN ;
197
+ convert_stats = get_convert_stats_sha1 (get_sha1_from_cache (path ),
198
+ search_only );
199
+ return convert_stats_ascii (convert_stats );
144
200
}
145
201
146
202
const char * get_wt_convert_stats_ascii (const char * path )
147
203
{
148
- const char * ret = "" ;
149
- struct strbuf sb = STRBUF_INIT ;
150
- if (strbuf_read_file (& sb , path , 0 ) >= 0 )
151
- ret = gather_convert_stats_ascii (sb .buf , sb .len );
152
- strbuf_release (& sb );
153
- return ret ;
204
+ unsigned convert_stats ;
205
+ convert_stats = get_convert_stats_wt (path );
206
+ return convert_stats_ascii (convert_stats );
154
207
}
155
208
156
209
static int text_eol_is_crlf (void )
@@ -214,16 +267,10 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
214
267
215
268
static int has_cr_in_index (const char * path )
216
269
{
217
- unsigned long sz ;
218
- void * data ;
219
- int has_cr ;
220
-
221
- data = read_blob_data_from_cache (path , & sz );
222
- if (!data )
223
- return 0 ;
224
- has_cr = memchr (data , '\r' , sz ) != NULL ;
225
- free (data );
226
- return has_cr ;
270
+ unsigned convert_stats ;
271
+ convert_stats = get_convert_stats_sha1 (get_sha1_from_cache (path ),
272
+ CONVERT_STAT_BITS_ANY_CR );
273
+ return convert_stats & CONVERT_STAT_BITS_ANY_CR ;
227
274
}
228
275
229
276
static int will_convert_lf_to_crlf (size_t len , struct text_stat * stats ,
@@ -235,13 +282,13 @@ static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
235
282
if (!stats -> lonelf )
236
283
return 0 ;
237
284
238
- if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF ) {
285
+ if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_CRLF ) {
239
286
/* If we have any CR or CRLF line endings, we do not touch it */
240
287
/* This is the new safer autocrlf-handling */
241
288
if (stats -> lonecr || stats -> crlf )
242
289
return 0 ;
243
290
244
- if (convert_is_binary ( len , stats ) )
291
+ if (stats -> stat_bits & CONVERT_STAT_BITS_BIN )
245
292
return 0 ;
246
293
}
247
294
return 1 ;
@@ -254,7 +301,8 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
254
301
{
255
302
struct text_stat stats ;
256
303
char * dst ;
257
- int convert_crlf_into_lf ;
304
+ int has_crlf_to_convert ;
305
+ unsigned search_only = 0 ;
258
306
259
307
if (crlf_action == CRLF_BINARY ||
260
308
(src && !len ))
@@ -267,12 +315,16 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
267
315
if (!buf && !src )
268
316
return 1 ;
269
317
270
- gather_stats (src , len , & stats );
318
+ if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF )
319
+ search_only = CONVERT_STAT_BITS_BIN ;
320
+
321
+ gather_all_stats (src , len , & stats , search_only );
322
+
271
323
/* Optimization: No CRLF? Nothing to convert, regardless. */
272
- convert_crlf_into_lf = !!stats .crlf ;
324
+ has_crlf_to_convert = !!stats .crlf ;
273
325
274
326
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF ) {
275
- if (convert_is_binary ( len , & stats ) )
327
+ if (stats . stat_bits & CONVERT_STAT_BITS_BIN )
276
328
return 0 ;
277
329
/*
278
330
* If the file in the index has any CR in it, do not convert.
@@ -281,24 +333,35 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
281
333
if (checksafe == SAFE_CRLF_RENORMALIZE )
282
334
checksafe = SAFE_CRLF_FALSE ;
283
335
else if (has_cr_in_index (path ))
284
- convert_crlf_into_lf = 0 ;
336
+ has_crlf_to_convert = 0 ;
285
337
}
286
338
if (checksafe && len ) {
287
339
struct text_stat new_stats ;
288
340
memcpy (& new_stats , & stats , sizeof (new_stats ));
289
341
/* simulate "git add" */
290
- if (convert_crlf_into_lf ) {
342
+ if (has_crlf_to_convert ) {
291
343
new_stats .lonelf += new_stats .crlf ;
292
344
new_stats .crlf = 0 ;
345
+ /* all crlf, if any, are gone. Update the bits */
346
+ new_stats .stat_bits = stats .stat_bits & CONVERT_STAT_BITS_BIN ;
347
+ if (new_stats .lonelf )
348
+ new_stats .stat_bits |= CONVERT_STAT_BITS_TXT_LF ;
349
+ if (new_stats .lonecr )
350
+ new_stats .stat_bits |= CONVERT_STAT_BITS_ANY_CR ;
293
351
}
294
352
/* simulate "git checkout" */
295
353
if (will_convert_lf_to_crlf (len , & new_stats , crlf_action )) {
296
354
new_stats .crlf += new_stats .lonelf ;
297
355
new_stats .lonelf = 0 ;
356
+ new_stats .stat_bits = stats .stat_bits & CONVERT_STAT_BITS_BIN ;
357
+ if (new_stats .crlf )
358
+ new_stats .stat_bits |= CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_ANY_CR ;
359
+ if (new_stats .lonecr )
360
+ new_stats .stat_bits |= CONVERT_STAT_BITS_ANY_CR ;
298
361
}
299
362
check_safe_crlf (path , crlf_action , & stats , & new_stats , checksafe );
300
363
}
301
- if (!convert_crlf_into_lf )
364
+ if (!has_crlf_to_convert )
302
365
return 0 ;
303
366
304
367
/*
@@ -339,11 +402,15 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
339
402
{
340
403
char * to_free = NULL ;
341
404
struct text_stat stats ;
405
+ unsigned search_only = 0 ;
342
406
343
407
if (!len || output_eol (crlf_action ) != EOL_CRLF )
344
408
return 0 ;
345
409
346
- gather_stats (src , len , & stats );
410
+ if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_CRLF )
411
+ search_only = CONVERT_STAT_BITS_ANY_CR | CONVERT_STAT_BITS_BIN ;
412
+
413
+ gather_all_stats (src , len , & stats , search_only );
347
414
if (!will_convert_lf_to_crlf (len , & stats , crlf_action ))
348
415
return 0 ;
349
416
0 commit comments