Edit on GitHub

utils.files.pickle

File operations with pickles.

  1"""
  2File operations with [pickles](https://docs.python.org/3/library/pickle.html).
  3"""
  4
  5import pathlib
  6import pandas
  7
  8from typing import Optional, List, Union
  9
 10from ..datasets import pandas as pnd
 11from ..files import file as fl
 12from ..logs.log import logger
 13
 14
 15def openPickleAsPandasTable(
 16    pickleFilePath: Union[str, pathlib.Path]
 17) -> pandas.DataFrame:
 18    """
 19    Read [Pandas](https://pandas.pydata.org) table from provided pickle file
 20    (*after checking that the file exists and that it is actually a file*).
 21
 22    Example:
 23
 24    ``` py
 25    from phab.utils.files import pickle
 26
 27    pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl")
 28    #print(pnd.head(15))
 29    ```
 30    """
 31    pickleFile: Optional[pathlib.Path] = fl.fileExists(pickleFilePath)
 32    if pickleFile is None:
 33        raise ValueError(
 34            f"Provided path to [{pickleFilePath}] seems to be wrong"
 35        )
 36    else:
 37        return pandas.read_pickle(pickleFile)
 38
 39
 40def savePandasTableAsPickle(
 41    pandasTable: pandas.DataFrame,
 42    pickleFilePath: Union[str, pathlib.Path]
 43) -> None:
 44    """
 45    Save [Pandas](https://pandas.pydata.org) table to a pickle file.
 46
 47    Example:
 48
 49    ``` py
 50    from phab.utils.files import pickle
 51
 52    savePandasTableAsPickle(pnd, "/path/to/some.pkl")
 53    ```
 54    """
 55    filePath: pathlib.Path = pathlib.Path()
 56    if isinstance(pickleFilePath, str):
 57        filePath = pathlib.Path(pickleFilePath)
 58    else:
 59        filePath = pickleFilePath
 60    if filePath.exists():
 61        raise ValueError(f"The [{filePath}] file already exists")
 62    pandasTable.to_pickle(filePath)
 63
 64
 65def mergePickles(
 66    picklesToMergePath: Union[str, pathlib.Path],
 67    resultingPicklePath: Union[None, str, pathlib.Path]
 68) -> Optional[pandas.DataFrame]:
 69    """
 70    Merge several pickle files into one. Looks for pickle files (*`*.pkl`*)
 71    in the provided folder, reads them to [Pandas](https://pandas.pydata.org)
 72    tables (*with `utils.files.pickle.openPickleAsPandasTable`*)
 73    and concatenates those into one final Pandas table
 74    (*using `utils.datasets.pandas.mergeTables`*).
 75
 76    Saves resulting Pandas table to file (*if provided path is not `None`*)
 77    or just returns it.
 78
 79    Example:
 80
 81    ``` py
 82    from phab.utils.files import pickle
 83
 84    pickle.mergePickles(
 85        "/path/to/pickles/to/merge/",
 86        "/path/to/where/to/save/result.pkl"
 87    )
 88
 89    # or
 90
 91    tbl = pickle.mergePickles(
 92        "/path/to/pickles/to/merge/",
 93        None
 94    )
 95    #print(tbl.head(15))
 96    ```
 97    """
 98    picklesToMerge = None
 99    inputPath: Optional[pathlib.Path] = fl.directoryExists(picklesToMergePath)
100    if inputPath is None:
101        raise ValueError(
102            f"Provided path to [{picklesToMergePath}] seems to be wrong"
103        )
104    else:
105        picklesToMerge = list(inputPath.glob("**/*.pkl"))
106
107    frames = []
108
109    filesCount = len(picklesToMerge)
110    logger.debug(f"Found files: {filesCount}")
111    if filesCount == 0:
112        raise ValueError("There are no files in the provided folder")
113    # elif filesCount == 1:
114    #     raise ValueError(
115    #         "[ERROR] There is only one file in the provided folder"
116    #     )
117    else:
118        for p in picklesToMerge:
119            logger.info(f"Merging {p}...")
120            tbl = openPickleAsPandasTable(p)
121            logger.debug(f"Records in this pickle: {len(tbl)}")
122            frames.append(tbl)
123
124    mergedTable = pnd.mergeTables(frames)
125
126    if resultingPicklePath:
127        savePandasTableAsPickle(mergedTable, resultingPicklePath)
128        return None
129    else:
130        return mergedTable
def openPickleAsPandasTable(pickleFilePath: Union[str, pathlib.Path]) -> pandas.core.frame.DataFrame:
16def openPickleAsPandasTable(
17    pickleFilePath: Union[str, pathlib.Path]
18) -> pandas.DataFrame:
19    """
20    Read [Pandas](https://pandas.pydata.org) table from provided pickle file
21    (*after checking that the file exists and that it is actually a file*).
22
23    Example:
24
25    ``` py
26    from phab.utils.files import pickle
27
28    pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl")
29    #print(pnd.head(15))
30    ```
31    """
32    pickleFile: Optional[pathlib.Path] = fl.fileExists(pickleFilePath)
33    if pickleFile is None:
34        raise ValueError(
35            f"Provided path to [{pickleFilePath}] seems to be wrong"
36        )
37    else:
38        return pandas.read_pickle(pickleFile)

Read Pandas table from provided pickle file (after checking that the file exists and that it is actually a file).

Example:

from phab.utils.files import pickle

pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl")
#print(pnd.head(15))
def savePandasTableAsPickle( pandasTable: pandas.core.frame.DataFrame, pickleFilePath: Union[str, pathlib.Path]) -> None:
41def savePandasTableAsPickle(
42    pandasTable: pandas.DataFrame,
43    pickleFilePath: Union[str, pathlib.Path]
44) -> None:
45    """
46    Save [Pandas](https://pandas.pydata.org) table to a pickle file.
47
48    Example:
49
50    ``` py
51    from phab.utils.files import pickle
52
53    savePandasTableAsPickle(pnd, "/path/to/some.pkl")
54    ```
55    """
56    filePath: pathlib.Path = pathlib.Path()
57    if isinstance(pickleFilePath, str):
58        filePath = pathlib.Path(pickleFilePath)
59    else:
60        filePath = pickleFilePath
61    if filePath.exists():
62        raise ValueError(f"The [{filePath}] file already exists")
63    pandasTable.to_pickle(filePath)

Save Pandas table to a pickle file.

Example:

from phab.utils.files import pickle

savePandasTableAsPickle(pnd, "/path/to/some.pkl")
def mergePickles( picklesToMergePath: Union[str, pathlib.Path], resultingPicklePath: Union[NoneType, str, pathlib.Path]) -> Optional[pandas.core.frame.DataFrame]:
 66def mergePickles(
 67    picklesToMergePath: Union[str, pathlib.Path],
 68    resultingPicklePath: Union[None, str, pathlib.Path]
 69) -> Optional[pandas.DataFrame]:
 70    """
 71    Merge several pickle files into one. Looks for pickle files (*`*.pkl`*)
 72    in the provided folder, reads them to [Pandas](https://pandas.pydata.org)
 73    tables (*with `utils.files.pickle.openPickleAsPandasTable`*)
 74    and concatenates those into one final Pandas table
 75    (*using `utils.datasets.pandas.mergeTables`*).
 76
 77    Saves resulting Pandas table to file (*if provided path is not `None`*)
 78    or just returns it.
 79
 80    Example:
 81
 82    ``` py
 83    from phab.utils.files import pickle
 84
 85    pickle.mergePickles(
 86        "/path/to/pickles/to/merge/",
 87        "/path/to/where/to/save/result.pkl"
 88    )
 89
 90    # or
 91
 92    tbl = pickle.mergePickles(
 93        "/path/to/pickles/to/merge/",
 94        None
 95    )
 96    #print(tbl.head(15))
 97    ```
 98    """
 99    picklesToMerge = None
100    inputPath: Optional[pathlib.Path] = fl.directoryExists(picklesToMergePath)
101    if inputPath is None:
102        raise ValueError(
103            f"Provided path to [{picklesToMergePath}] seems to be wrong"
104        )
105    else:
106        picklesToMerge = list(inputPath.glob("**/*.pkl"))
107
108    frames = []
109
110    filesCount = len(picklesToMerge)
111    logger.debug(f"Found files: {filesCount}")
112    if filesCount == 0:
113        raise ValueError("There are no files in the provided folder")
114    # elif filesCount == 1:
115    #     raise ValueError(
116    #         "[ERROR] There is only one file in the provided folder"
117    #     )
118    else:
119        for p in picklesToMerge:
120            logger.info(f"Merging {p}...")
121            tbl = openPickleAsPandasTable(p)
122            logger.debug(f"Records in this pickle: {len(tbl)}")
123            frames.append(tbl)
124
125    mergedTable = pnd.mergeTables(frames)
126
127    if resultingPicklePath:
128        savePandasTableAsPickle(mergedTable, resultingPicklePath)
129        return None
130    else:
131        return mergedTable

Merge several pickle files into one. Looks for pickle files (*.pkl) in the provided folder, reads them to Pandas tables (with utils.files.pickle.openPickleAsPandasTable) and concatenates those into one final Pandas table (using utils.datasets.pandas.mergeTables).

Saves resulting Pandas table to file (if provided path is not None) or just returns it.

Example:

from phab.utils.files import pickle

pickle.mergePickles(
    "/path/to/pickles/to/merge/",
    "/path/to/where/to/save/result.pkl"
)

# or

tbl = pickle.mergePickles(
    "/path/to/pickles/to/merge/",
    None
)
#print(tbl.head(15))