uio.utility.files.pickle
File operations with pickles.
1""" 2File operations with [pickles](https://docs.python.org/3/library/pickle.html). 3""" 4 5import pathlib 6import pandas 7 8from typing import Optional, List, Union 9 10from ..datasets import pandas as pnd 11from ..logs.log import logger 12 13 14def openPickleAsPandasTable( 15 pickleFilePath: Union[str, pathlib.Path] 16) -> pandas.DataFrame: 17 """ 18 Read [Pandas](https://pandas.pydata.org) table from provided pickle file 19 (*after checking that the file exists and that it is actually a file*). 20 21 Example: 22 23 ``` py 24 from uio.utility.files import pickle 25 26 pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl") 27 #print(pnd.head(15)) 28 ``` 29 """ 30 filePath: pathlib.Path = pathlib.Path() 31 if isinstance(pickleFilePath, str): 32 filePath = pathlib.Path(pickleFilePath) 33 else: 34 filePath = pickleFilePath 35 if not filePath.exists(): 36 raise ValueError(f"The path [{filePath}] does not exist") 37 if not filePath.is_file(): 38 raise ValueError(f"The path [{filePath}] is not a file") 39 return pandas.read_pickle(filePath) 40 41 42def savePandasTableAsPickle( 43 pandasTable: pandas.DataFrame, 44 pickleFilePath: Union[str, pathlib.Path] 45) -> None: 46 """ 47 Save [Pandas](https://pandas.pydata.org) table to a pickle file. 48 49 Example: 50 51 ``` py 52 from uio.utility.files import pickle 53 54 savePandasTableAsPickle(pnd, "/path/to/some.pkl") 55 ``` 56 """ 57 filePath: pathlib.Path = pathlib.Path() 58 if isinstance(pickleFilePath, str): 59 filePath = pathlib.Path(pickleFilePath) 60 else: 61 filePath = pickleFilePath 62 if filePath.exists(): 63 raise ValueError(f"The [{filePath}] file already exists") 64 pandasTable.to_pickle(filePath) 65 66 67def mergePickles( 68 picklesToMergePath: Union[str, pathlib.Path], 69 resultingPicklePath: Union[None, str, pathlib.Path] 70) -> Optional[pandas.DataFrame]: 71 """ 72 Merge several pickle files into one. Looks for pickle files (*`*.pkl`*) 73 in the provided folder, reads them to [Pandas](https://pandas.pydata.org) 74 tables (*with `uio.utility.files.pickle.openPickleAsPandasTable`*) 75 and concatenates those into one final Pandas table 76 (*using `uio.utility.datasets.pandas.mergeTables`*). 77 78 Saves resulting Pandas table to file (*if provided path is not `None`*) 79 or just returns it. 80 81 Example: 82 83 ``` py 84 from uio.utility.files import pickle 85 86 pickle.mergePickles( 87 "/path/to/pickles/to/merge/", 88 "/path/to/where/to/save/result.pkl" 89 ) 90 91 # or 92 93 tbl = pickle.mergePickles( 94 "/path/to/pickles/to/merge/", 95 None 96 ) 97 #print(tbl.head(15)) 98 ``` 99 """ 100 inputPath: pathlib.Path = pathlib.Path() 101 if isinstance(picklesToMergePath, str): 102 inputPath = pathlib.Path(picklesToMergePath) 103 else: 104 inputPath = picklesToMergePath 105 106 if not inputPath.exists(): 107 raise ValueError(f"The path [{inputPath}] does not exist") 108 if not inputPath.is_dir(): 109 raise ValueError(f"The [{inputPath}] is not a folder") 110 111 picklesToMerge = list(inputPath.glob("**/*.pkl")) 112 113 frames = [] 114 115 filesCount = len(picklesToMerge) 116 logger.debug(f"Found files: {filesCount}") 117 if filesCount == 0: 118 raise ValueError("There are no files in the provided folder") 119 # elif filesCount == 1: 120 # raise ValueError( 121 # "[ERROR] There is only one file in the provided folder" 122 # ) 123 else: 124 for p in picklesToMerge: 125 logger.info(f"Merging {p}...") 126 tbl = openPickleAsPandasTable(p) 127 logger.debug(f"Records in this pickle: {len(tbl)}") 128 frames.append(tbl) 129 130 mergedTable = pnd.mergeTables(frames) 131 132 if resultingPicklePath: 133 savePandasTableAsPickle(mergedTable, resultingPicklePath) 134 return None 135 else: 136 return mergedTable
def
openPickleAsPandasTable(pickleFilePath: Union[str, pathlib.Path]) -> pandas.core.frame.DataFrame:
15def openPickleAsPandasTable( 16 pickleFilePath: Union[str, pathlib.Path] 17) -> pandas.DataFrame: 18 """ 19 Read [Pandas](https://pandas.pydata.org) table from provided pickle file 20 (*after checking that the file exists and that it is actually a file*). 21 22 Example: 23 24 ``` py 25 from uio.utility.files import pickle 26 27 pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl") 28 #print(pnd.head(15)) 29 ``` 30 """ 31 filePath: pathlib.Path = pathlib.Path() 32 if isinstance(pickleFilePath, str): 33 filePath = pathlib.Path(pickleFilePath) 34 else: 35 filePath = pickleFilePath 36 if not filePath.exists(): 37 raise ValueError(f"The path [{filePath}] does not exist") 38 if not filePath.is_file(): 39 raise ValueError(f"The path [{filePath}] is not a file") 40 return pandas.read_pickle(filePath)
Read Pandas table from provided pickle file (after checking that the file exists and that it is actually a file).
Example:
from uio.utility.files import pickle
pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl")
#print(pnd.head(15))
def
savePandasTableAsPickle( pandasTable: pandas.core.frame.DataFrame, pickleFilePath: Union[str, pathlib.Path]) -> None:
43def savePandasTableAsPickle( 44 pandasTable: pandas.DataFrame, 45 pickleFilePath: Union[str, pathlib.Path] 46) -> None: 47 """ 48 Save [Pandas](https://pandas.pydata.org) table to a pickle file. 49 50 Example: 51 52 ``` py 53 from uio.utility.files import pickle 54 55 savePandasTableAsPickle(pnd, "/path/to/some.pkl") 56 ``` 57 """ 58 filePath: pathlib.Path = pathlib.Path() 59 if isinstance(pickleFilePath, str): 60 filePath = pathlib.Path(pickleFilePath) 61 else: 62 filePath = pickleFilePath 63 if filePath.exists(): 64 raise ValueError(f"The [{filePath}] file already exists") 65 pandasTable.to_pickle(filePath)
Save Pandas table to a pickle file.
Example:
from uio.utility.files import pickle
savePandasTableAsPickle(pnd, "/path/to/some.pkl")
def
mergePickles( picklesToMergePath: Union[str, pathlib.Path], resultingPicklePath: Union[NoneType, str, pathlib.Path]) -> Optional[pandas.core.frame.DataFrame]:
68def mergePickles( 69 picklesToMergePath: Union[str, pathlib.Path], 70 resultingPicklePath: Union[None, str, pathlib.Path] 71) -> Optional[pandas.DataFrame]: 72 """ 73 Merge several pickle files into one. Looks for pickle files (*`*.pkl`*) 74 in the provided folder, reads them to [Pandas](https://pandas.pydata.org) 75 tables (*with `uio.utility.files.pickle.openPickleAsPandasTable`*) 76 and concatenates those into one final Pandas table 77 (*using `uio.utility.datasets.pandas.mergeTables`*). 78 79 Saves resulting Pandas table to file (*if provided path is not `None`*) 80 or just returns it. 81 82 Example: 83 84 ``` py 85 from uio.utility.files import pickle 86 87 pickle.mergePickles( 88 "/path/to/pickles/to/merge/", 89 "/path/to/where/to/save/result.pkl" 90 ) 91 92 # or 93 94 tbl = pickle.mergePickles( 95 "/path/to/pickles/to/merge/", 96 None 97 ) 98 #print(tbl.head(15)) 99 ``` 100 """ 101 inputPath: pathlib.Path = pathlib.Path() 102 if isinstance(picklesToMergePath, str): 103 inputPath = pathlib.Path(picklesToMergePath) 104 else: 105 inputPath = picklesToMergePath 106 107 if not inputPath.exists(): 108 raise ValueError(f"The path [{inputPath}] does not exist") 109 if not inputPath.is_dir(): 110 raise ValueError(f"The [{inputPath}] is not a folder") 111 112 picklesToMerge = list(inputPath.glob("**/*.pkl")) 113 114 frames = [] 115 116 filesCount = len(picklesToMerge) 117 logger.debug(f"Found files: {filesCount}") 118 if filesCount == 0: 119 raise ValueError("There are no files in the provided folder") 120 # elif filesCount == 1: 121 # raise ValueError( 122 # "[ERROR] There is only one file in the provided folder" 123 # ) 124 else: 125 for p in picklesToMerge: 126 logger.info(f"Merging {p}...") 127 tbl = openPickleAsPandasTable(p) 128 logger.debug(f"Records in this pickle: {len(tbl)}") 129 frames.append(tbl) 130 131 mergedTable = pnd.mergeTables(frames) 132 133 if resultingPicklePath: 134 savePandasTableAsPickle(mergedTable, resultingPicklePath) 135 return None 136 else: 137 return mergedTable
Merge several pickle files into one. Looks for pickle files (*.pkl
)
in the provided folder, reads them to Pandas
tables (with uio.utility.files.pickle.openPickleAsPandasTable
)
and concatenates those into one final Pandas table
(using uio.utility.datasets.pandas.mergeTables
).
Saves resulting Pandas table to file (if provided path is not None
)
or just returns it.
Example:
from uio.utility.files import pickle
pickle.mergePickles(
"/path/to/pickles/to/merge/",
"/path/to/where/to/save/result.pkl"
)
# or
tbl = pickle.mergePickles(
"/path/to/pickles/to/merge/",
None
)
#print(tbl.head(15))