utils.databases.lightcurves
Getting light curves data.
1""" 2Getting light curves data. 3""" 4 5import lightkurve 6from astropy.table import Table 7import pandas 8from pandera import pandas as pandera 9import numpy 10import pathlib 11import re 12from packaging.version import Version 13 14from typing import Optional, Dict, List, Pattern, Literal 15 16from ..files import file as fl 17from ..logs.log import logger 18 19# apparently, one cannot set long/short threshold, 20# hence this dictionary 21# 22# there are actually more authors available, 23# but we are only interested in these 24# 25authors: Dict[str, Dict] = { 26 "Kepler": 27 { 28 "mission": "Kepler", 29 "cadence": 30 { 31 "long": [1800], 32 "short": [60] 33 } 34 }, 35 "K2": 36 { 37 "mission": "K2", 38 "cadence": 39 { 40 "long": [1800], 41 "short": [60] 42 } 43 }, 44 "SPOC": 45 { 46 "mission": "TESS", 47 "cadence": 48 { 49 "long": [600], 50 "short": [120], 51 "fast": [20] 52 } 53 }, 54 "TESS-SPOC": 55 { 56 "mission": "TESS", 57 "cadence": 58 { 59 "long": [] # any cadence is long 60 } 61 } 62} 63""" 64Dictionary of authors, their cadence values and mapping to missions. 65""" 66 67missionSectorRegExes: Dict[str, Pattern] = { 68 "Kepler": re.compile( 69 r"^Kepler\s\w+\s(\d+)$" # Kepler Quarter 15 70 ), 71 "K2": re.compile( 72 r"^K2\s\w+\s(\d+)$" # K2 Campaign 12 73 ), 74 "TESS": re.compile( 75 r"^TESS\s\w+\s(\d+)$" # TESS Sector 40 76 ) 77} 78""" 79Dictionary of regular expressions for extracting sectors. 80""" 81 82lightCurveFluxTableSchema = pandera.DataFrameSchema( 83 { 84 "time": pandera.Column(numpy.float64), 85 "flux": pandera.Column(numpy.float32, nullable=True), 86 "fluxError": pandera.Column(numpy.float32, nullable=True) 87 }, 88 index=pandera.Index(int, unique=True), 89 strict=True, # only specified columns are allowed 90 coerce=False # do not cast other types to the specified one 91) 92""" 93Table schema for light curve fluxes. 94""" 95 96 97def getLightCurveStats( 98 starName: str, 99 detailed: bool = True 100) -> Dict[str, Dict]: 101 """ 102 Gather statistics about available cadence values for a given star. 103 104 If `detailed` is set to `False`, then function will skip collecting 105 cadence values count by sectors, so resulting statistics will only 106 contain total count of values. 107 108 Example: 109 110 ``` py 111 from phab.utils.databases import lightcurves 112 113 stats = lightcurves.getLightCurveStats("Kepler-114") 114 if not stats: 115 print("Didn't find any results for this star") 116 else: 117 missionName = "Kepler" 118 cadenceType = "long" 119 sectors = stats.get( 120 missionName, 121 {} 122 ).get(cadenceType) 123 if sectors is None: 124 print( 125 " ".join(( 126 "There doesn't seem to be any sectors", 127 f"with [{cadenceType}] cadence by [{missionName}]" 128 )) 129 ) 130 else: 131 totalProperty = "total" 132 sectorsCount = sectors.get(totalProperty) 133 if sectorsCount is None: 134 print( 135 " ".join(( 136 f"For some reason, the [{totalProperty}] property", 137 f"is missing from the [{cadenceType}] cadence", 138 f"collection by [{missionName}]" 139 )) 140 ) 141 else: 142 print( 143 " ".join(( 144 f"Total amount of sectors with [{cadenceType}]", 145 f"cadence by [{missionName}]: {sectorsCount}", 146 )) 147 ) 148 bySectors = sectors.get("by-sectors") 149 if bySectors is None: 150 print( 151 " ".join(( 152 "For some reason, the [total] property is missing", 153 f"from the [{cadenceType}] cadence collection", 154 f"by [{missionName}]" 155 )) 156 ) 157 else: 158 for s in bySectors: 159 print(f"- {s}: {bySectors[s]}") 160 ``` 161 """ 162 stats: Dict[str, Dict] = {} 163 164 lghtcrvs = lightkurve.search_lightcurve( 165 starName, 166 author=tuple(authors.keys()) 167 ) 168 if len(lghtcrvs) != 0: 169 tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[ 170 ["author", "exptime", "mission"] 171 ] 172 logger.debug(tbl) 173 174 author: str # for mypy, but even then it is not happy with something else 175 for author, group in (tbl.groupby("author")): # type:ignore[assignment] # ya hz 176 if author not in authors: 177 raise ValueError(f"Unknown author: {author}") 178 mission = authors[author]["mission"] 179 if not stats.get(mission): 180 stats[mission] = {} 181 for cadence in ["long", "short", "fast"]: 182 if cadence in authors[author]["cadence"]: 183 stats[mission][cadence] = {} 184 cadenceValues: List[int] = ( 185 authors[author]["cadence"][cadence] 186 ) 187 cadences: pandas.DataFrame 188 if len(cadenceValues) > 0: # take only specified values 189 # perhaps both of these should be normalized to int 190 cadences = group.query("exptime == @cadenceValues") 191 else: # any value is good 192 cadences = group 193 194 # total count 195 stats[mission][cadence]["total"] = len(cadences) 196 197 if detailed: 198 # count by sectors 199 stats[mission][cadence]["by-sectors"] = {} 200 for m in cadences["mission"]: 201 # logger.debug(cadences.query("mission == @m")[ 202 # "exptime" 203 # ].values) 204 sectorMatch = re.search( 205 missionSectorRegExes[mission], 206 m 207 ) 208 if not sectorMatch: 209 raise ValueError( 210 " ".join(( 211 "Couldn't extract sector from", 212 f"this mission value: {m}" 213 )) 214 ) 215 sector = sectorMatch.group(1) 216 if not stats[mission][cadence]["by-sectors"].get( 217 sector 218 ): # this sector hasn't been added yet 219 stats[mission][cadence]["by-sectors"][ 220 sector 221 ] = {} 222 # save the cadence/exptime too (assuming 223 # that it is the same for every sector entry) 224 stats[mission][cadence]["by-sectors"][sector][ 225 "exptime" 226 ] = cadences.query("mission == @m")[ 227 "exptime" 228 ].values[0] # there must be a better way 229 try: 230 stats[mission][cadence][ 231 "by-sectors" 232 ][sector]["count"] += 1 233 except KeyError: 234 stats[mission][cadence][ 235 "by-sectors" 236 ][sector]["count"] = 1 237 return stats 238 239 240def getLightCurveIDs( 241 starName: str 242) -> Dict[str, List[str]]: 243 """ 244 Based on available cadence values statistics for a given star, 245 get names of missions and cadences. For instance, in order to pass 246 them to `altaipony.lcio.from_mast()`. 247 248 Example: 249 250 ``` py 251 from phab.utils.databases import lightcurves 252 from altaipony.lcio import from_mast 253 254 starName = "LTT 1445 A" 255 lightCurveIDs = {} 256 257 try: 258 lightCurveIDs = lightcurves.getLightCurveIDs(starName) 259 except ValueError as ex: 260 print(f"Failed to get light curves missons and cadences. {ex}") 261 raise 262 if not lightCurveIDs: 263 raise ValueError("Didn't find any results for this star") 264 #print(lightCurveIDs) 265 266 for m in lightCurveIDs.keys(): 267 #print(f"Mission: {m}") 268 for c in lightCurveIDs[m]: 269 #print(f"- {c}") 270 flc = from_mast( 271 starName, 272 mode="LC", 273 cadence=c, 274 mission=m 275 ) 276 #print(flc) 277 ``` 278 """ 279 lightCurveIDs: Dict[str, List[str]] = {} 280 281 stats: Dict[str, Dict] = getLightCurveStats( 282 starName, 283 detailed=False 284 ) 285 if not stats: 286 raise ValueError("Didn't find any results for this star") 287 288 # the order matters, it goes from most important to least important, 289 # and in fact long cadence is so not important that it is discarded 290 # if there is fast or short cadence available 291 cadencePriority = ["fast", "short", "long"] 292 293 for m in stats.keys(): 294 lightCurveIDs[m] = [] 295 priorityThreshold = 0 296 for cp in cadencePriority: 297 # if there is already fast or short cadence in the list, 298 # don't take long cadence (except for mission K2, because 299 # its long cadence is what's most important even if 300 # there are also fast and short ones) 301 if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2": 302 break 303 if cp in stats[m]: 304 # print(f"Count [{cp}]: {stats[m][cp]['total']}") 305 totalCnt = stats[m][cp].get("total") 306 if totalCnt and totalCnt != 0: 307 lightCurveIDs[m].append(cp) 308 # else: 309 # print( 310 # " ".join(( 311 # f"[WARNING] The [{cp}] cadence count", 312 # f"in [{m}] is 0 (or missing)" 313 # )) 314 # ) 315 priorityThreshold += 1 316 317 return lightCurveIDs 318 319 320def fitsToPandas( 321 fitsFilePath: str, 322 fitsType: Optional[Literal["tess", "kepler"]] = None, 323 qualityBitmask: Literal["none", "default", "hard", "hardest"] = "default", 324 dropNanTimes: bool = True, 325 convertTimesToSeconds: bool = False 326) -> pandas.DataFrame: 327 """ 328 Open a generic light curves [FITS](https://en.wikipedia.org/wiki/FITS) file 329 and create a Pandas table from it. Only the fluxes, their times 330 and errors columns are taken. 331 332 Handles the big/little endians problem when converting from FITS to Pandas. 333 334 Example: 335 336 ``` py 337 from phab.utils.databases import lightcurves 338 339 pnd = lightcurves.fitsToPandas( 340 "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits", 341 fitsType="tess", 342 qualityBitmask="default", 343 dropNanTimes=True, 344 convertTimesToSeconds=True 345 ) 346 347 #print(pnd) 348 ``` 349 """ 350 lc = None 351 fitsFile: Optional[pathlib.Path] = fl.fileExists(fitsFilePath) 352 if fitsFile is None: 353 raise ValueError( 354 f"Provided path to [{fitsFilePath}] seems to be wrong" 355 ) 356 else: 357 lc = Table.read(fitsFile) 358 359 # exclude values which do not satisfy the required quality 360 if fitsType is not None: 361 msk = None 362 if fitsType == "tess": 363 msk = lightkurve.utils.TessQualityFlags.create_quality_mask( 364 quality_array=lc["QUALITY"], 365 bitmask=qualityBitmask 366 ) 367 elif fitsType == "kepler": 368 msk = lightkurve.utils.KeplerQualityFlags.create_quality_mask( 369 quality_array=lc["QUALITY"], 370 bitmask=qualityBitmask 371 ) 372 else: 373 print( 374 " ".join(( 375 "[WARNING] Unknown FITS type, don't know", 376 "which quality mask to use" 377 )) 378 ) 379 lc = lc[msk] 380 381 narr = numpy.array(lc) 382 # FITS stores data in big-endian, but pandas works with little-endian, 383 # so the byte order needs to be swapped 384 # https://stackoverflow.com/a/30284033/1688203 385 if Version(numpy.__version__) > Version("1.26.4"): 386 # if that doesn't work, then you might need to downgrade to 1.26.4 387 narr = narr.view(narr.dtype.newbyteorder()).byteswap() 388 else: 389 narr = narr.byteswap().newbyteorder() 390 391 # astropy.time does not(?) support NaN 392 if dropNanTimes: 393 nantimes = numpy.isnan(narr["TIME"].data) 394 if numpy.any(nantimes): 395 print( 396 " ".join(( 397 f"[DEBUG] {numpy.sum(nantimes)} rows were excluded,", 398 "because their time values are NaN" 399 )) 400 ) 401 narr = narr[~nantimes] 402 403 # apparently, one cannot just take columns from `lc`/`narr` directly, 404 # hence this intermediate table 405 pndraw = pandas.DataFrame(narr) 406 logger.debug(f"Light curve table columns: {pndraw.columns}") 407 408 flux = pandas.DataFrame( 409 columns=[ 410 "time", 411 "flux", 412 "fluxError" 413 ] 414 ) 415 flux["time"] = pndraw["TIME"] 416 flux["flux"] = pndraw["PDCSAP_FLUX"] 417 flux["fluxError"] = pndraw["PDCSAP_FLUX_ERR"] 418 419 # in case excluding NaN times right after `Table.read()` is less efficient 420 # if dropNanTimes: 421 # flux = flux.dropna(subset=["time"]) 422 423 if convertTimesToSeconds: 424 flux["time"] = flux["time"] * 24 * 60 * 60 425 426 if dropNanTimes: 427 lightCurveFluxTableSchema.validate(flux) 428 else: 429 lightCurveFluxTableSchema.update_column( 430 "time", 431 dtype=numpy.float64, 432 nullable=True 433 ).validate(flux) 434 435 return flux 436 437 438def lightCurveTessToPandas( 439 lightKurve: lightkurve.lightcurve.TessLightCurve, 440 convertTimesToSeconds: bool = False 441) -> pandas.DataFrame: 442 """ 443 Converting a TESS light curve object to a Pandas table. In general, 444 it does almost the same thing as 445 `utils.databases.lightcurves.fitsToPandas()`, 446 but here there it uses a TESS-specific reading function, and also 447 there is no need to drop NaN times "manually" (*and fiddle with endians?*). 448 449 Example: 450 451 ``` py 452 from phab.utils.databases import lightcurves 453 import lightkurve 454 455 downloadLC: bool = False 456 lc = None 457 if downloadLC: 458 search_result = lightkurve.search_lightcurve( 459 "Karmn J07446+035", 460 author="SPOC", 461 cadence="short" 462 ) 463 lc = search_result[0].download( 464 quality_bitmask="default" 465 ) 466 else: 467 lc = lightkurve.TessLightCurve.read( 468 "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits", 469 quality_bitmask="default" 470 ) 471 472 pnd = lightcurves.lightCurveTessToPandas(lc, convertTimesToSeconds=True) 473 474 #print(pnd) 475 ``` 476 """ 477 pndraw = lightKurve.to_pandas() 478 logger.debug(f"Light curve table columns: {pndraw.columns}") 479 480 flux = pandas.DataFrame( 481 columns=[ 482 "time", 483 "flux", 484 "fluxError" 485 ] 486 ) 487 488 flux["time"] = pndraw.index 489 flux["flux"] = pndraw["pdcsap_flux"].values 490 flux["fluxError"] = pndraw["pdcsap_flux_err"].values 491 492 if convertTimesToSeconds: 493 flux["time"] = flux["time"] * 24 * 60 * 60 494 495 lightCurveFluxTableSchema.validate(flux) 496 497 return flux
Dictionary of regular expressions for extracting sectors.
Table schema for light curve fluxes.
98def getLightCurveStats( 99 starName: str, 100 detailed: bool = True 101) -> Dict[str, Dict]: 102 """ 103 Gather statistics about available cadence values for a given star. 104 105 If `detailed` is set to `False`, then function will skip collecting 106 cadence values count by sectors, so resulting statistics will only 107 contain total count of values. 108 109 Example: 110 111 ``` py 112 from phab.utils.databases import lightcurves 113 114 stats = lightcurves.getLightCurveStats("Kepler-114") 115 if not stats: 116 print("Didn't find any results for this star") 117 else: 118 missionName = "Kepler" 119 cadenceType = "long" 120 sectors = stats.get( 121 missionName, 122 {} 123 ).get(cadenceType) 124 if sectors is None: 125 print( 126 " ".join(( 127 "There doesn't seem to be any sectors", 128 f"with [{cadenceType}] cadence by [{missionName}]" 129 )) 130 ) 131 else: 132 totalProperty = "total" 133 sectorsCount = sectors.get(totalProperty) 134 if sectorsCount is None: 135 print( 136 " ".join(( 137 f"For some reason, the [{totalProperty}] property", 138 f"is missing from the [{cadenceType}] cadence", 139 f"collection by [{missionName}]" 140 )) 141 ) 142 else: 143 print( 144 " ".join(( 145 f"Total amount of sectors with [{cadenceType}]", 146 f"cadence by [{missionName}]: {sectorsCount}", 147 )) 148 ) 149 bySectors = sectors.get("by-sectors") 150 if bySectors is None: 151 print( 152 " ".join(( 153 "For some reason, the [total] property is missing", 154 f"from the [{cadenceType}] cadence collection", 155 f"by [{missionName}]" 156 )) 157 ) 158 else: 159 for s in bySectors: 160 print(f"- {s}: {bySectors[s]}") 161 ``` 162 """ 163 stats: Dict[str, Dict] = {} 164 165 lghtcrvs = lightkurve.search_lightcurve( 166 starName, 167 author=tuple(authors.keys()) 168 ) 169 if len(lghtcrvs) != 0: 170 tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[ 171 ["author", "exptime", "mission"] 172 ] 173 logger.debug(tbl) 174 175 author: str # for mypy, but even then it is not happy with something else 176 for author, group in (tbl.groupby("author")): # type:ignore[assignment] # ya hz 177 if author not in authors: 178 raise ValueError(f"Unknown author: {author}") 179 mission = authors[author]["mission"] 180 if not stats.get(mission): 181 stats[mission] = {} 182 for cadence in ["long", "short", "fast"]: 183 if cadence in authors[author]["cadence"]: 184 stats[mission][cadence] = {} 185 cadenceValues: List[int] = ( 186 authors[author]["cadence"][cadence] 187 ) 188 cadences: pandas.DataFrame 189 if len(cadenceValues) > 0: # take only specified values 190 # perhaps both of these should be normalized to int 191 cadences = group.query("exptime == @cadenceValues") 192 else: # any value is good 193 cadences = group 194 195 # total count 196 stats[mission][cadence]["total"] = len(cadences) 197 198 if detailed: 199 # count by sectors 200 stats[mission][cadence]["by-sectors"] = {} 201 for m in cadences["mission"]: 202 # logger.debug(cadences.query("mission == @m")[ 203 # "exptime" 204 # ].values) 205 sectorMatch = re.search( 206 missionSectorRegExes[mission], 207 m 208 ) 209 if not sectorMatch: 210 raise ValueError( 211 " ".join(( 212 "Couldn't extract sector from", 213 f"this mission value: {m}" 214 )) 215 ) 216 sector = sectorMatch.group(1) 217 if not stats[mission][cadence]["by-sectors"].get( 218 sector 219 ): # this sector hasn't been added yet 220 stats[mission][cadence]["by-sectors"][ 221 sector 222 ] = {} 223 # save the cadence/exptime too (assuming 224 # that it is the same for every sector entry) 225 stats[mission][cadence]["by-sectors"][sector][ 226 "exptime" 227 ] = cadences.query("mission == @m")[ 228 "exptime" 229 ].values[0] # there must be a better way 230 try: 231 stats[mission][cadence][ 232 "by-sectors" 233 ][sector]["count"] += 1 234 except KeyError: 235 stats[mission][cadence][ 236 "by-sectors" 237 ][sector]["count"] = 1 238 return stats
Gather statistics about available cadence values for a given star.
If detailed
is set to False
, then function will skip collecting
cadence values count by sectors, so resulting statistics will only
contain total count of values.
Example:
from phab.utils.databases import lightcurves
stats = lightcurves.getLightCurveStats("Kepler-114")
if not stats:
print("Didn't find any results for this star")
else:
missionName = "Kepler"
cadenceType = "long"
sectors = stats.get(
missionName,
{}
).get(cadenceType)
if sectors is None:
print(
" ".join((
"There doesn't seem to be any sectors",
f"with [{cadenceType}] cadence by [{missionName}]"
))
)
else:
totalProperty = "total"
sectorsCount = sectors.get(totalProperty)
if sectorsCount is None:
print(
" ".join((
f"For some reason, the [{totalProperty}] property",
f"is missing from the [{cadenceType}] cadence",
f"collection by [{missionName}]"
))
)
else:
print(
" ".join((
f"Total amount of sectors with [{cadenceType}]",
f"cadence by [{missionName}]: {sectorsCount}",
))
)
bySectors = sectors.get("by-sectors")
if bySectors is None:
print(
" ".join((
"For some reason, the [total] property is missing",
f"from the [{cadenceType}] cadence collection",
f"by [{missionName}]"
))
)
else:
for s in bySectors:
print(f"- {s}: {bySectors[s]}")
241def getLightCurveIDs( 242 starName: str 243) -> Dict[str, List[str]]: 244 """ 245 Based on available cadence values statistics for a given star, 246 get names of missions and cadences. For instance, in order to pass 247 them to `altaipony.lcio.from_mast()`. 248 249 Example: 250 251 ``` py 252 from phab.utils.databases import lightcurves 253 from altaipony.lcio import from_mast 254 255 starName = "LTT 1445 A" 256 lightCurveIDs = {} 257 258 try: 259 lightCurveIDs = lightcurves.getLightCurveIDs(starName) 260 except ValueError as ex: 261 print(f"Failed to get light curves missons and cadences. {ex}") 262 raise 263 if not lightCurveIDs: 264 raise ValueError("Didn't find any results for this star") 265 #print(lightCurveIDs) 266 267 for m in lightCurveIDs.keys(): 268 #print(f"Mission: {m}") 269 for c in lightCurveIDs[m]: 270 #print(f"- {c}") 271 flc = from_mast( 272 starName, 273 mode="LC", 274 cadence=c, 275 mission=m 276 ) 277 #print(flc) 278 ``` 279 """ 280 lightCurveIDs: Dict[str, List[str]] = {} 281 282 stats: Dict[str, Dict] = getLightCurveStats( 283 starName, 284 detailed=False 285 ) 286 if not stats: 287 raise ValueError("Didn't find any results for this star") 288 289 # the order matters, it goes from most important to least important, 290 # and in fact long cadence is so not important that it is discarded 291 # if there is fast or short cadence available 292 cadencePriority = ["fast", "short", "long"] 293 294 for m in stats.keys(): 295 lightCurveIDs[m] = [] 296 priorityThreshold = 0 297 for cp in cadencePriority: 298 # if there is already fast or short cadence in the list, 299 # don't take long cadence (except for mission K2, because 300 # its long cadence is what's most important even if 301 # there are also fast and short ones) 302 if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2": 303 break 304 if cp in stats[m]: 305 # print(f"Count [{cp}]: {stats[m][cp]['total']}") 306 totalCnt = stats[m][cp].get("total") 307 if totalCnt and totalCnt != 0: 308 lightCurveIDs[m].append(cp) 309 # else: 310 # print( 311 # " ".join(( 312 # f"[WARNING] The [{cp}] cadence count", 313 # f"in [{m}] is 0 (or missing)" 314 # )) 315 # ) 316 priorityThreshold += 1 317 318 return lightCurveIDs
Based on available cadence values statistics for a given star,
get names of missions and cadences. For instance, in order to pass
them to altaipony.lcio.from_mast()
.
Example:
from phab.utils.databases import lightcurves
from altaipony.lcio import from_mast
starName = "LTT 1445 A"
lightCurveIDs = {}
try:
lightCurveIDs = lightcurves.getLightCurveIDs(starName)
except ValueError as ex:
print(f"Failed to get light curves missons and cadences. {ex}")
raise
if not lightCurveIDs:
raise ValueError("Didn't find any results for this star")
#print(lightCurveIDs)
for m in lightCurveIDs.keys():
#print(f"Mission: {m}")
for c in lightCurveIDs[m]:
#print(f"- {c}")
flc = from_mast(
starName,
mode="LC",
cadence=c,
mission=m
)
#print(flc)
321def fitsToPandas( 322 fitsFilePath: str, 323 fitsType: Optional[Literal["tess", "kepler"]] = None, 324 qualityBitmask: Literal["none", "default", "hard", "hardest"] = "default", 325 dropNanTimes: bool = True, 326 convertTimesToSeconds: bool = False 327) -> pandas.DataFrame: 328 """ 329 Open a generic light curves [FITS](https://en.wikipedia.org/wiki/FITS) file 330 and create a Pandas table from it. Only the fluxes, their times 331 and errors columns are taken. 332 333 Handles the big/little endians problem when converting from FITS to Pandas. 334 335 Example: 336 337 ``` py 338 from phab.utils.databases import lightcurves 339 340 pnd = lightcurves.fitsToPandas( 341 "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits", 342 fitsType="tess", 343 qualityBitmask="default", 344 dropNanTimes=True, 345 convertTimesToSeconds=True 346 ) 347 348 #print(pnd) 349 ``` 350 """ 351 lc = None 352 fitsFile: Optional[pathlib.Path] = fl.fileExists(fitsFilePath) 353 if fitsFile is None: 354 raise ValueError( 355 f"Provided path to [{fitsFilePath}] seems to be wrong" 356 ) 357 else: 358 lc = Table.read(fitsFile) 359 360 # exclude values which do not satisfy the required quality 361 if fitsType is not None: 362 msk = None 363 if fitsType == "tess": 364 msk = lightkurve.utils.TessQualityFlags.create_quality_mask( 365 quality_array=lc["QUALITY"], 366 bitmask=qualityBitmask 367 ) 368 elif fitsType == "kepler": 369 msk = lightkurve.utils.KeplerQualityFlags.create_quality_mask( 370 quality_array=lc["QUALITY"], 371 bitmask=qualityBitmask 372 ) 373 else: 374 print( 375 " ".join(( 376 "[WARNING] Unknown FITS type, don't know", 377 "which quality mask to use" 378 )) 379 ) 380 lc = lc[msk] 381 382 narr = numpy.array(lc) 383 # FITS stores data in big-endian, but pandas works with little-endian, 384 # so the byte order needs to be swapped 385 # https://stackoverflow.com/a/30284033/1688203 386 if Version(numpy.__version__) > Version("1.26.4"): 387 # if that doesn't work, then you might need to downgrade to 1.26.4 388 narr = narr.view(narr.dtype.newbyteorder()).byteswap() 389 else: 390 narr = narr.byteswap().newbyteorder() 391 392 # astropy.time does not(?) support NaN 393 if dropNanTimes: 394 nantimes = numpy.isnan(narr["TIME"].data) 395 if numpy.any(nantimes): 396 print( 397 " ".join(( 398 f"[DEBUG] {numpy.sum(nantimes)} rows were excluded,", 399 "because their time values are NaN" 400 )) 401 ) 402 narr = narr[~nantimes] 403 404 # apparently, one cannot just take columns from `lc`/`narr` directly, 405 # hence this intermediate table 406 pndraw = pandas.DataFrame(narr) 407 logger.debug(f"Light curve table columns: {pndraw.columns}") 408 409 flux = pandas.DataFrame( 410 columns=[ 411 "time", 412 "flux", 413 "fluxError" 414 ] 415 ) 416 flux["time"] = pndraw["TIME"] 417 flux["flux"] = pndraw["PDCSAP_FLUX"] 418 flux["fluxError"] = pndraw["PDCSAP_FLUX_ERR"] 419 420 # in case excluding NaN times right after `Table.read()` is less efficient 421 # if dropNanTimes: 422 # flux = flux.dropna(subset=["time"]) 423 424 if convertTimesToSeconds: 425 flux["time"] = flux["time"] * 24 * 60 * 60 426 427 if dropNanTimes: 428 lightCurveFluxTableSchema.validate(flux) 429 else: 430 lightCurveFluxTableSchema.update_column( 431 "time", 432 dtype=numpy.float64, 433 nullable=True 434 ).validate(flux) 435 436 return flux
Open a generic light curves FITS file and create a Pandas table from it. Only the fluxes, their times and errors columns are taken.
Handles the big/little endians problem when converting from FITS to Pandas.
Example:
from phab.utils.databases import lightcurves
pnd = lightcurves.fitsToPandas(
"./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits",
fitsType="tess",
qualityBitmask="default",
dropNanTimes=True,
convertTimesToSeconds=True
)
#print(pnd)
439def lightCurveTessToPandas( 440 lightKurve: lightkurve.lightcurve.TessLightCurve, 441 convertTimesToSeconds: bool = False 442) -> pandas.DataFrame: 443 """ 444 Converting a TESS light curve object to a Pandas table. In general, 445 it does almost the same thing as 446 `utils.databases.lightcurves.fitsToPandas()`, 447 but here there it uses a TESS-specific reading function, and also 448 there is no need to drop NaN times "manually" (*and fiddle with endians?*). 449 450 Example: 451 452 ``` py 453 from phab.utils.databases import lightcurves 454 import lightkurve 455 456 downloadLC: bool = False 457 lc = None 458 if downloadLC: 459 search_result = lightkurve.search_lightcurve( 460 "Karmn J07446+035", 461 author="SPOC", 462 cadence="short" 463 ) 464 lc = search_result[0].download( 465 quality_bitmask="default" 466 ) 467 else: 468 lc = lightkurve.TessLightCurve.read( 469 "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits", 470 quality_bitmask="default" 471 ) 472 473 pnd = lightcurves.lightCurveTessToPandas(lc, convertTimesToSeconds=True) 474 475 #print(pnd) 476 ``` 477 """ 478 pndraw = lightKurve.to_pandas() 479 logger.debug(f"Light curve table columns: {pndraw.columns}") 480 481 flux = pandas.DataFrame( 482 columns=[ 483 "time", 484 "flux", 485 "fluxError" 486 ] 487 ) 488 489 flux["time"] = pndraw.index 490 flux["flux"] = pndraw["pdcsap_flux"].values 491 flux["fluxError"] = pndraw["pdcsap_flux_err"].values 492 493 if convertTimesToSeconds: 494 flux["time"] = flux["time"] * 24 * 60 * 60 495 496 lightCurveFluxTableSchema.validate(flux) 497 498 return flux
Converting a TESS light curve object to a Pandas table. In general,
it does almost the same thing as
utils.databases.lightcurves.fitsToPandas()
,
but here there it uses a TESS-specific reading function, and also
there is no need to drop NaN times "manually" (and fiddle with endians?).
Example:
from phab.utils.databases import lightcurves
import lightkurve
downloadLC: bool = False
lc = None
if downloadLC:
search_result = lightkurve.search_lightcurve(
"Karmn J07446+035",
author="SPOC",
cadence="short"
)
lc = search_result[0].download(
quality_bitmask="default"
)
else:
lc = lightkurve.TessLightCurve.read(
"./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits",
quality_bitmask="default"
)
pnd = lightcurves.lightCurveTessToPandas(lc, convertTimesToSeconds=True)
#print(pnd)