utils.files.pickle
File operations with pickles.
1""" 2File operations with [pickles](https://docs.python.org/3/library/pickle.html). 3""" 4 5import pathlib 6import pandas 7 8from typing import Optional, List, Union 9 10from ..datasets import pandas as pnd 11from ..files import file as fl 12from ..logs.log import logger 13 14 15def openPickleAsPandasTable( 16 pickleFilePath: Union[str, pathlib.Path] 17) -> pandas.DataFrame: 18 """ 19 Read [Pandas](https://pandas.pydata.org) table from provided pickle file 20 (*after checking that the file exists and that it is actually a file*). 21 22 Example: 23 24 ``` py 25 from phab.utils.files import pickle 26 27 pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl") 28 #print(pnd.head(15)) 29 ``` 30 """ 31 pickleFile: Optional[pathlib.Path] = fl.fileExists(pickleFilePath) 32 if pickleFile is None: 33 raise ValueError( 34 f"Provided path to [{pickleFilePath}] seems to be wrong" 35 ) 36 else: 37 return pandas.read_pickle(pickleFile) 38 39 40def savePandasTableAsPickle( 41 pandasTable: pandas.DataFrame, 42 pickleFilePath: Union[str, pathlib.Path] 43) -> None: 44 """ 45 Save [Pandas](https://pandas.pydata.org) table to a pickle file. 46 47 Example: 48 49 ``` py 50 from phab.utils.files import pickle 51 52 savePandasTableAsPickle(pnd, "/path/to/some.pkl") 53 ``` 54 """ 55 filePath: pathlib.Path = pathlib.Path() 56 if isinstance(pickleFilePath, str): 57 filePath = pathlib.Path(pickleFilePath) 58 else: 59 filePath = pickleFilePath 60 if filePath.exists(): 61 raise ValueError(f"The [{filePath}] file already exists") 62 pandasTable.to_pickle(filePath) 63 64 65def mergePickles( 66 picklesToMergePath: Union[str, pathlib.Path], 67 resultingPicklePath: Union[None, str, pathlib.Path] 68) -> Optional[pandas.DataFrame]: 69 """ 70 Merge several pickle files into one. Looks for pickle files (*`*.pkl`*) 71 in the provided folder, reads them to [Pandas](https://pandas.pydata.org) 72 tables (*with `utils.files.pickle.openPickleAsPandasTable`*) 73 and concatenates those into one final Pandas table 74 (*using `utils.datasets.pandas.mergeTables`*). 75 76 Saves resulting Pandas table to file (*if provided path is not `None`*) 77 or just returns it. 78 79 Example: 80 81 ``` py 82 from phab.utils.files import pickle 83 84 pickle.mergePickles( 85 "/path/to/pickles/to/merge/", 86 "/path/to/where/to/save/result.pkl" 87 ) 88 89 # or 90 91 tbl = pickle.mergePickles( 92 "/path/to/pickles/to/merge/", 93 None 94 ) 95 #print(tbl.head(15)) 96 ``` 97 """ 98 picklesToMerge = None 99 inputPath: Optional[pathlib.Path] = fl.directoryExists(picklesToMergePath) 100 if inputPath is None: 101 raise ValueError( 102 f"Provided path to [{picklesToMergePath}] seems to be wrong" 103 ) 104 else: 105 picklesToMerge = list(inputPath.glob("**/*.pkl")) 106 107 frames = [] 108 109 filesCount = len(picklesToMerge) 110 logger.debug(f"Found files: {filesCount}") 111 if filesCount == 0: 112 raise ValueError("There are no files in the provided folder") 113 # elif filesCount == 1: 114 # raise ValueError( 115 # "[ERROR] There is only one file in the provided folder" 116 # ) 117 else: 118 for p in picklesToMerge: 119 logger.info(f"Merging {p}...") 120 tbl = openPickleAsPandasTable(p) 121 logger.debug(f"Records in this pickle: {len(tbl)}") 122 frames.append(tbl) 123 124 mergedTable = pnd.mergeTables(frames) 125 126 if resultingPicklePath: 127 savePandasTableAsPickle(mergedTable, resultingPicklePath) 128 return None 129 else: 130 return mergedTable
def
openPickleAsPandasTable(pickleFilePath: Union[str, pathlib.Path]) -> pandas.core.frame.DataFrame:
16def openPickleAsPandasTable( 17 pickleFilePath: Union[str, pathlib.Path] 18) -> pandas.DataFrame: 19 """ 20 Read [Pandas](https://pandas.pydata.org) table from provided pickle file 21 (*after checking that the file exists and that it is actually a file*). 22 23 Example: 24 25 ``` py 26 from phab.utils.files import pickle 27 28 pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl") 29 #print(pnd.head(15)) 30 ``` 31 """ 32 pickleFile: Optional[pathlib.Path] = fl.fileExists(pickleFilePath) 33 if pickleFile is None: 34 raise ValueError( 35 f"Provided path to [{pickleFilePath}] seems to be wrong" 36 ) 37 else: 38 return pandas.read_pickle(pickleFile)
Read Pandas table from provided pickle file (after checking that the file exists and that it is actually a file).
Example:
from phab.utils.files import pickle
pnd = pickle.openPickleAsPandasTable("/path/to/some.pkl")
#print(pnd.head(15))
def
savePandasTableAsPickle( pandasTable: pandas.core.frame.DataFrame, pickleFilePath: Union[str, pathlib.Path]) -> None:
41def savePandasTableAsPickle( 42 pandasTable: pandas.DataFrame, 43 pickleFilePath: Union[str, pathlib.Path] 44) -> None: 45 """ 46 Save [Pandas](https://pandas.pydata.org) table to a pickle file. 47 48 Example: 49 50 ``` py 51 from phab.utils.files import pickle 52 53 savePandasTableAsPickle(pnd, "/path/to/some.pkl") 54 ``` 55 """ 56 filePath: pathlib.Path = pathlib.Path() 57 if isinstance(pickleFilePath, str): 58 filePath = pathlib.Path(pickleFilePath) 59 else: 60 filePath = pickleFilePath 61 if filePath.exists(): 62 raise ValueError(f"The [{filePath}] file already exists") 63 pandasTable.to_pickle(filePath)
Save Pandas table to a pickle file.
Example:
from phab.utils.files import pickle
savePandasTableAsPickle(pnd, "/path/to/some.pkl")
def
mergePickles( picklesToMergePath: Union[str, pathlib.Path], resultingPicklePath: Union[NoneType, str, pathlib.Path]) -> Optional[pandas.core.frame.DataFrame]:
66def mergePickles( 67 picklesToMergePath: Union[str, pathlib.Path], 68 resultingPicklePath: Union[None, str, pathlib.Path] 69) -> Optional[pandas.DataFrame]: 70 """ 71 Merge several pickle files into one. Looks for pickle files (*`*.pkl`*) 72 in the provided folder, reads them to [Pandas](https://pandas.pydata.org) 73 tables (*with `utils.files.pickle.openPickleAsPandasTable`*) 74 and concatenates those into one final Pandas table 75 (*using `utils.datasets.pandas.mergeTables`*). 76 77 Saves resulting Pandas table to file (*if provided path is not `None`*) 78 or just returns it. 79 80 Example: 81 82 ``` py 83 from phab.utils.files import pickle 84 85 pickle.mergePickles( 86 "/path/to/pickles/to/merge/", 87 "/path/to/where/to/save/result.pkl" 88 ) 89 90 # or 91 92 tbl = pickle.mergePickles( 93 "/path/to/pickles/to/merge/", 94 None 95 ) 96 #print(tbl.head(15)) 97 ``` 98 """ 99 picklesToMerge = None 100 inputPath: Optional[pathlib.Path] = fl.directoryExists(picklesToMergePath) 101 if inputPath is None: 102 raise ValueError( 103 f"Provided path to [{picklesToMergePath}] seems to be wrong" 104 ) 105 else: 106 picklesToMerge = list(inputPath.glob("**/*.pkl")) 107 108 frames = [] 109 110 filesCount = len(picklesToMerge) 111 logger.debug(f"Found files: {filesCount}") 112 if filesCount == 0: 113 raise ValueError("There are no files in the provided folder") 114 # elif filesCount == 1: 115 # raise ValueError( 116 # "[ERROR] There is only one file in the provided folder" 117 # ) 118 else: 119 for p in picklesToMerge: 120 logger.info(f"Merging {p}...") 121 tbl = openPickleAsPandasTable(p) 122 logger.debug(f"Records in this pickle: {len(tbl)}") 123 frames.append(tbl) 124 125 mergedTable = pnd.mergeTables(frames) 126 127 if resultingPicklePath: 128 savePandasTableAsPickle(mergedTable, resultingPicklePath) 129 return None 130 else: 131 return mergedTable
Merge several pickle files into one. Looks for pickle files (*.pkl
)
in the provided folder, reads them to Pandas
tables (with utils.files.pickle.openPickleAsPandasTable
)
and concatenates those into one final Pandas table
(using utils.datasets.pandas.mergeTables
).
Saves resulting Pandas table to file (if provided path is not None
)
or just returns it.
Example:
from phab.utils.files import pickle
pickle.mergePickles(
"/path/to/pickles/to/merge/",
"/path/to/where/to/save/result.pkl"
)
# or
tbl = pickle.mergePickles(
"/path/to/pickles/to/merge/",
None
)
#print(tbl.head(15))