|
4 | 4 |
|
5 | 5 | import pandas as pd
|
6 | 6 | import gzip
|
7 |
| -import os |
8 | 7 |
|
9 | 8 | COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27, 32), (32, 39),
|
10 | 9 | (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), (70, 75)]
|
@@ -93,11 +92,17 @@ def read_bsrn(filename):
|
93 | 92 | if str(filename).endswith('.gz'): # check if file is a gzipped (.gz) file
|
94 | 93 | with gzip.open(filename, 'rt') as f:
|
95 | 94 | for num, line in enumerate(f):
|
| 95 | + if num==1: # Get month and year from the 2nd line |
| 96 | + start_date = pd.Timestamp(year=int(line[7:11]), |
| 97 | + month=int(line[3:6]), day=1) |
96 | 98 | if line.startswith('*'): # Find start of all logical records
|
97 | 99 | line_no_dict[line[2:6]] = num # key is 4 digit LR number
|
98 | 100 | else:
|
99 | 101 | with open(filename, 'r') as f:
|
100 | 102 | for num, line in enumerate(f):
|
| 103 | + if num==1: # Get month and year from the 2nd line |
| 104 | + start_date = pd.Timestamp(year=int(line[7:11]), |
| 105 | + month=int(line[3:6]), day=1) |
101 | 106 | if line.startswith('*'): # Find start of all logical records
|
102 | 107 | line_no_dict[line[2:6]] = num
|
103 | 108 |
|
@@ -129,8 +134,7 @@ def read_bsrn(filename):
|
129 | 134 | data['minute'] = data['minute'].astype('Int64')
|
130 | 135 |
|
131 | 136 | # Set datetime index and localize to UTC
|
132 |
| - basename = os.path.basename(filename) # get month and year from filename |
133 |
| - data.index = (pd.to_datetime(basename[3:7], format='%m%y') |
| 137 | + data.index = (start_date |
134 | 138 | + pd.to_timedelta(data['day']-1, unit='d')
|
135 | 139 | + pd.to_timedelta(data['minute'], unit='min'))
|
136 | 140 |
|
|
0 commit comments