Edit on GitHub

uio.utility.files.pickle

File operations with pickles.

  1"""
  2File operations with [pickles](https://docs.python.org/3/library/pickle.html).
  3"""
  4
  5import pathlib
  6import pandas
  7
  8from typing import Optional, List, Union
  9
 10from ..datasets import pandas as pnd
 11from ..logs.log import logger
 12
 13
 14def openPickleAsPandasTable(
 15    pickleFilePath: Union[str, pathlib.Path]
 16) -> pandas.DataFrame:
 17    """
 18    Read [Pandas](https://pandas.pydata.org) table from provided pickle file
 19    (*after checking that the file exists and that it is actually a file*).
 20
 21    Example:
 22
 23    ``` py
 24    from uio.utility.files import pickle
 25
 26    pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl")
 27    #print(pnd.head(15))
 28    ```
 29    """
 30    filePath: pathlib.Path = pathlib.Path()
 31    if isinstance(pickleFilePath, str):
 32        filePath = pathlib.Path(pickleFilePath)
 33    else:
 34        filePath = pickleFilePath
 35    if not filePath.exists():
 36        raise ValueError(f"The path [{filePath}] does not exist")
 37    if not filePath.is_file():
 38        raise ValueError(f"The path [{filePath}] is not a file")
 39    return pandas.read_pickle(filePath)
 40
 41
 42def savePandasTableAsPickle(
 43    pandasTable: pandas.DataFrame,
 44    pickleFilePath: Union[str, pathlib.Path]
 45) -> None:
 46    """
 47    Save [Pandas](https://pandas.pydata.org) table to a pickle file.
 48
 49    Example:
 50
 51    ``` py
 52    from uio.utility.files import pickle
 53
 54    savePandasTableAsPickle(pnd, "/path/to/some.pkl")
 55    ```
 56    """
 57    filePath: pathlib.Path = pathlib.Path()
 58    if isinstance(pickleFilePath, str):
 59        filePath = pathlib.Path(pickleFilePath)
 60    else:
 61        filePath = pickleFilePath
 62    if filePath.exists():
 63        raise ValueError(f"The [{filePath}] file already exists")
 64    pandasTable.to_pickle(filePath)
 65
 66
 67def mergePickles(
 68    picklesToMergePath: Union[str, pathlib.Path],
 69    resultingPicklePath: Union[None, str, pathlib.Path]
 70) -> Optional[pandas.DataFrame]:
 71    """
 72    Merge several pickle files into one. Looks for pickle files (*`*.pkl`*)
 73    in the provided folder, reads them to [Pandas](https://pandas.pydata.org)
 74    tables (*with `uio.utility.files.pickle.openPickleAsPandasTable`*)
 75    and concatenates those into one final Pandas table
 76    (*using `uio.utility.datasets.pandas.mergeTables`*).
 77
 78    Saves resulting Pandas table to file (*if provided path is not `None`*)
 79    or just returns it.
 80
 81    Example:
 82
 83    ``` py
 84    from uio.utility.files import pickle
 85
 86    pickle.mergePickles(
 87        "/path/to/pickles/to/merge/",
 88        "/path/to/where/to/save/result.pkl"
 89    )
 90
 91    # or
 92
 93    tbl = pickle.mergePickles(
 94        "/path/to/pickles/to/merge/",
 95        None
 96    )
 97    #print(tbl.head(15))
 98    ```
 99    """
100    inputPath: pathlib.Path = pathlib.Path()
101    if isinstance(picklesToMergePath, str):
102        inputPath = pathlib.Path(picklesToMergePath)
103    else:
104        inputPath = picklesToMergePath
105
106    if not inputPath.exists():
107        raise ValueError(f"The path [{inputPath}] does not exist")
108    if not inputPath.is_dir():
109        raise ValueError(f"The [{inputPath}] is not a folder")
110
111    picklesToMerge = list(inputPath.glob("**/*.pkl"))
112
113    frames = []
114
115    filesCount = len(picklesToMerge)
116    logger.debug(f"Found files: {filesCount}")
117    if filesCount == 0:
118        raise ValueError("There are no files in the provided folder")
119    # elif filesCount == 1:
120    #     raise ValueError(
121    #         "[ERROR] There is only one file in the provided folder"
122    #     )
123    else:
124        for p in picklesToMerge:
125            logger.info(f"Merging {p}...")
126            tbl = openPickleAsPandasTable(p)
127            logger.debug(f"Records in this pickle: {len(tbl)}")
128            frames.append(tbl)
129
130    mergedTable = pnd.mergeTables(frames)
131
132    if resultingPicklePath:
133        savePandasTableAsPickle(mergedTable, resultingPicklePath)
134        return None
135    else:
136        return mergedTable
def openPickleAsPandasTable(pickleFilePath: Union[str, pathlib.Path]) -> pandas.core.frame.DataFrame:
15def openPickleAsPandasTable(
16    pickleFilePath: Union[str, pathlib.Path]
17) -> pandas.DataFrame:
18    """
19    Read [Pandas](https://pandas.pydata.org) table from provided pickle file
20    (*after checking that the file exists and that it is actually a file*).
21
22    Example:
23
24    ``` py
25    from uio.utility.files import pickle
26
27    pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl")
28    #print(pnd.head(15))
29    ```
30    """
31    filePath: pathlib.Path = pathlib.Path()
32    if isinstance(pickleFilePath, str):
33        filePath = pathlib.Path(pickleFilePath)
34    else:
35        filePath = pickleFilePath
36    if not filePath.exists():
37        raise ValueError(f"The path [{filePath}] does not exist")
38    if not filePath.is_file():
39        raise ValueError(f"The path [{filePath}] is not a file")
40    return pandas.read_pickle(filePath)

Read Pandas table from provided pickle file (after checking that the file exists and that it is actually a file).

Example:

from uio.utility.files import pickle

pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl")
#print(pnd.head(15))
def savePandasTableAsPickle( pandasTable: pandas.core.frame.DataFrame, pickleFilePath: Union[str, pathlib.Path]) -> None:
43def savePandasTableAsPickle(
44    pandasTable: pandas.DataFrame,
45    pickleFilePath: Union[str, pathlib.Path]
46) -> None:
47    """
48    Save [Pandas](https://pandas.pydata.org) table to a pickle file.
49
50    Example:
51
52    ``` py
53    from uio.utility.files import pickle
54
55    savePandasTableAsPickle(pnd, "/path/to/some.pkl")
56    ```
57    """
58    filePath: pathlib.Path = pathlib.Path()
59    if isinstance(pickleFilePath, str):
60        filePath = pathlib.Path(pickleFilePath)
61    else:
62        filePath = pickleFilePath
63    if filePath.exists():
64        raise ValueError(f"The [{filePath}] file already exists")
65    pandasTable.to_pickle(filePath)

Save Pandas table to a pickle file.

Example:

from uio.utility.files import pickle

savePandasTableAsPickle(pnd, "/path/to/some.pkl")
def mergePickles( picklesToMergePath: Union[str, pathlib.Path], resultingPicklePath: Union[NoneType, str, pathlib.Path]) -> Optional[pandas.core.frame.DataFrame]:
 68def mergePickles(
 69    picklesToMergePath: Union[str, pathlib.Path],
 70    resultingPicklePath: Union[None, str, pathlib.Path]
 71) -> Optional[pandas.DataFrame]:
 72    """
 73    Merge several pickle files into one. Looks for pickle files (*`*.pkl`*)
 74    in the provided folder, reads them to [Pandas](https://pandas.pydata.org)
 75    tables (*with `uio.utility.files.pickle.openPickleAsPandasTable`*)
 76    and concatenates those into one final Pandas table
 77    (*using `uio.utility.datasets.pandas.mergeTables`*).
 78
 79    Saves resulting Pandas table to file (*if provided path is not `None`*)
 80    or just returns it.
 81
 82    Example:
 83
 84    ``` py
 85    from uio.utility.files import pickle
 86
 87    pickle.mergePickles(
 88        "/path/to/pickles/to/merge/",
 89        "/path/to/where/to/save/result.pkl"
 90    )
 91
 92    # or
 93
 94    tbl = pickle.mergePickles(
 95        "/path/to/pickles/to/merge/",
 96        None
 97    )
 98    #print(tbl.head(15))
 99    ```
100    """
101    inputPath: pathlib.Path = pathlib.Path()
102    if isinstance(picklesToMergePath, str):
103        inputPath = pathlib.Path(picklesToMergePath)
104    else:
105        inputPath = picklesToMergePath
106
107    if not inputPath.exists():
108        raise ValueError(f"The path [{inputPath}] does not exist")
109    if not inputPath.is_dir():
110        raise ValueError(f"The [{inputPath}] is not a folder")
111
112    picklesToMerge = list(inputPath.glob("**/*.pkl"))
113
114    frames = []
115
116    filesCount = len(picklesToMerge)
117    logger.debug(f"Found files: {filesCount}")
118    if filesCount == 0:
119        raise ValueError("There are no files in the provided folder")
120    # elif filesCount == 1:
121    #     raise ValueError(
122    #         "[ERROR] There is only one file in the provided folder"
123    #     )
124    else:
125        for p in picklesToMerge:
126            logger.info(f"Merging {p}...")
127            tbl = openPickleAsPandasTable(p)
128            logger.debug(f"Records in this pickle: {len(tbl)}")
129            frames.append(tbl)
130
131    mergedTable = pnd.mergeTables(frames)
132
133    if resultingPicklePath:
134        savePandasTableAsPickle(mergedTable, resultingPicklePath)
135        return None
136    else:
137        return mergedTable

Merge several pickle files into one. Looks for pickle files (*.pkl) in the provided folder, reads them to Pandas tables (with uio.utility.files.pickle.openPickleAsPandasTable) and concatenates those into one final Pandas table (using uio.utility.datasets.pandas.mergeTables).

Saves resulting Pandas table to file (if provided path is not None) or just returns it.

Example:

from uio.utility.files import pickle

pickle.mergePickles(
    "/path/to/pickles/to/merge/",
    "/path/to/where/to/save/result.pkl"
)

# or

tbl = pickle.mergePickles(
    "/path/to/pickles/to/merge/",
    None
)
#print(tbl.head(15))