Skip to content

Commit c89febd

Browse files
authored
Fix FastParquetImpl.write for non-existent file
`PyArrowImpl` already correctly opens a non-existent file for writing, with `engine='fastparquet'` this fails for e.g. a GCS URL: ``` [nav] In [1]: pd.DataFrame().to_parquet('gs://city_data/test/blah.parquet') --------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) <ipython-input-1-dde78378baaa> in <module> ----> 1 pd.DataFrame().to_parquet('gs://city_data/test/blah.parquet') ~/venvs/model/lib/python3.7/site-packages/pandas/core/frame.py in to_parquet(self, fname, engine, compression, index, partition_cols, **kwargs) 2215 index=index, 2216 partition_cols=partition_cols, -> 2217 **kwargs 2218 ) 2219 ~/venvs/model/lib/python3.7/site-packages/pandas/io/parquet.py in to_parquet(df, path, engine, compression, index, partition_cols, **kwargs) 250 index=index, 251 partition_cols=partition_cols, --> 252 **kwargs 253 ) 254 ~/venvs/model/lib/python3.7/site-packages/pandas/io/parquet.py in write(self, df, path, compression, index, partition_cols, **kwargs) 171 kwargs["open_with"] = lambda path, _: path 172 else: --> 173 path, _, _, _ = get_filepath_or_buffer(path) 174 175 with catch_warnings(record=True): ~/venvs/model/lib/python3.7/site-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode) 212 213 return gcs.get_filepath_or_buffer( --> 214 filepath_or_buffer, encoding=encoding, compression=compression, mode=mode 215 ) 216 ~/venvs/model/lib/python3.7/site-packages/pandas/io/gcs.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode) 15 16 fs = gcsfs.GCSFileSystem() ---> 17 filepath_or_buffer = fs.open(filepath_or_buffer, mode) 18 return filepath_or_buffer, None, compression, True <decorator-gen-147> in open(self, path, mode, block_size, acl, consistency, metadata) ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in _tracemethod(f, self, *args, **kwargs) 51 logger.log(logging.DEBUG - 1, tb_io.getvalue()) 52 ---> 53 return f(self, *args, **kwargs) 54 55 ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in open(self, path, mode, block_size, acl, consistency, metadata) 1148 if 'b' in mode: 1149 return GCSFile(self, path, mode, block_size, consistency=const, -> 1150 metadata=metadata) 1151 else: 1152 mode = mode.replace('t', '') + 'b' <decorator-gen-150> in __init__(self, gcsfs, path, mode, block_size, acl, consistency, metadata) ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in _tracemethod(f, self, *args, **kwargs) 51 logger.log(logging.DEBUG - 1, tb_io.getvalue()) 52 ---> 53 return f(self, *args, **kwargs) 54 55 ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in __init__(self, gcsfs, path, mode, block_size, acl, consistency, metadata) 1276 raise NotImplementedError('File mode not supported') 1277 if mode == 'rb': -> 1278 self.details = gcsfs.info(path) 1279 self.size = self.details['size'] 1280 else: <decorator-gen-136> in info(self, path) ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in _tracemethod(f, self, *args, **kwargs) 51 logger.log(logging.DEBUG - 1, tb_io.getvalue()) 52 ---> 53 return f(self, *args, **kwargs) 54 55 ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in info(self, path) 863 864 try: --> 865 return self._get_object(path) 866 except FileNotFoundError: 867 logger.debug("info FileNotFound at path: %s", path) <decorator-gen-122> in _get_object(self, path) ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in _tracemethod(f, self, *args, **kwargs) 51 logger.log(logging.DEBUG - 1, tb_io.getvalue()) 52 ---> 53 return f(self, *args, **kwargs) 54 55 ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in _get_object(self, path) 539 raise FileNotFoundError(path) 540 --> 541 result = self._process_object(bucket, self._call('GET', 'b/{}/o/{}', bucket, key).json()) 542 543 return result <decorator-gen-121> in _call(self, method, path, *args, **kwargs) ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in _tracemethod(f, self, *args, **kwargs) 51 logger.log(logging.DEBUG - 1, tb_io.getvalue()) 52 ---> 53 return f(self, *args, **kwargs) 54 55 ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in _call(self, method, path, *args, **kwargs) 482 r = self.session.request(method, path, 483 params=kwargs, json=json, headers=headers, data=data, timeout=self.requests_timeout) --> 484 validate_response(r, path) 485 break 486 except (HttpError, RequestException, RateLimitException, GoogleAuthError) as e: ~/venvs/model/lib/python3.7/site-packages/gcsfs/core.py in validate_response(r, path) 156 157 if r.status_code == 404: --> 158 raise FileNotFoundError(path) 159 elif r.status_code == 403: 160 raise IOError("Forbidden: %s\n%s" % (path, msg)) FileNotFoundError: https://www.googleapis.com/storage/v1/b/city_data/o/test%2Fblah.parquet ```
1 parent 2d65e38 commit c89febd

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

pandas/io/parquet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def write(
170170
# And pass the opened s3file to the fastparquet internal impl.
171171
kwargs["open_with"] = lambda path, _: path
172172
else:
173-
path, _, _, _ = get_filepath_or_buffer(path)
173+
path, _, _, _ = get_filepath_or_buffer(path, mode="wb")
174174

175175
with catch_warnings(record=True):
176176
self.api.write(

0 commit comments

Comments
 (0)