utils.databases.lightcurves
Getting light curves data.
1""" 2Getting light curves data. 3""" 4 5import lightkurve 6import pandas 7import re 8 9from typing import Optional, Dict, List, Pattern 10 11from ..logs.log import logger 12 13# apparently, one cannot set long/short threshold, 14# hence this dictionary 15# 16# there are actually more authors available, 17# but we are only interested in these 18# 19authors: Dict[str, Dict] = { 20 "Kepler": 21 { 22 "mission": "Kepler", 23 "cadence": 24 { 25 "long": [1800], 26 "short": [60] 27 } 28 }, 29 "K2": 30 { 31 "mission": "K2", 32 "cadence": 33 { 34 "long": [1800], 35 "short": [60] 36 } 37 }, 38 "SPOC": 39 { 40 "mission": "TESS", 41 "cadence": 42 { 43 "long": [600], 44 "short": [120], 45 "fast": [20] 46 } 47 }, 48 "TESS-SPOC": 49 { 50 "mission": "TESS", 51 "cadence": 52 { 53 "long": [] # any cadence is long 54 } 55 } 56} 57""" 58Dictionary of authors, their cadence values and mapping to missions. 59""" 60 61missionSectorRegExes: Dict[str, Pattern] = { 62 "Kepler": re.compile( 63 r"^Kepler\s\w+\s(\d+)$" # Kepler Quarter 15 64 ), 65 "K2": re.compile( 66 r"^K2\s\w+\s(\d+)$" # K2 Campaign 12 67 ), 68 "TESS": re.compile( 69 r"^TESS\s\w+\s(\d+)$" # TESS Sector 40 70 ) 71} 72""" 73Dictionary of regular expressions for extracting sectors. 74""" 75 76 77def getLightCurveStats( 78 starName: str, 79 detailed: bool = True 80) -> Dict[str, Dict]: 81 """ 82 Gather statistics about available cadence values for a given star. 83 84 If `detailed` is set to `False`, then function will skip collecting 85 cadence values count by sectors, so resulting statistics will only 86 contain total count of values. 87 88 Example: 89 90 ``` py 91 from phab.utils.databases import lightcurves 92 93 stats = lightcurves.getLightCurveStats("Kepler-114") 94 if not stats: 95 print("Didn't find any results for this star") 96 else: 97 missionName = "Kepler" 98 cadenceType = "long" 99 sectors = stats.get( 100 missionName, 101 {} 102 ).get(cadenceType) 103 if sectors is None: 104 print( 105 " ".join(( 106 "There doesn't seem to be any sectors", 107 f"with [{cadenceType}] cadence by [{missionName}]" 108 )) 109 ) 110 else: 111 totalProperty = "total" 112 sectorsCount = sectors.get(totalProperty) 113 if sectorsCount is None: 114 print( 115 " ".join(( 116 f"For some reason, the [{totalProperty}] property", 117 f"is missing from the [{cadenceType}] cadence", 118 f"collection by [{missionName}]" 119 )) 120 ) 121 else: 122 print( 123 " ".join(( 124 f"Total amount of sectors with [{cadenceType}]", 125 f"cadence by [{missionName}]: {sectorsCount}", 126 )) 127 ) 128 bySectors = sectors.get("by-sectors") 129 if bySectors is None: 130 print( 131 " ".join(( 132 "For some reason, the [total] property is missing", 133 f"from the [{cadenceType}] cadence collection", 134 f"by [{missionName}]" 135 )) 136 ) 137 else: 138 for s in bySectors: 139 print(f"- {s}: {bySectors[s]}") 140 ``` 141 """ 142 stats: Dict[str, Dict] = {} 143 144 lghtcrvs = lightkurve.search_lightcurve( 145 starName, 146 author=tuple(authors.keys()) 147 ) 148 if len(lghtcrvs) != 0: 149 tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[ 150 ["author", "exptime", "mission"] 151 ] 152 logger.debug(tbl) 153 154 author: str # for mypy, but even then it is not happy with something else 155 for author, group in (tbl.groupby("author")): # type:ignore[assignment] # ya hz 156 if author not in authors: 157 raise ValueError(f"Unknown author: {author}") 158 mission = authors[author]["mission"] 159 if not stats.get(mission): 160 stats[mission] = {} 161 for cadence in ["long", "short", "fast"]: 162 if cadence in authors[author]["cadence"]: 163 stats[mission][cadence] = {} 164 cadenceValues: List[int] = ( 165 authors[author]["cadence"][cadence] 166 ) 167 cadences: pandas.DataFrame 168 if len(cadenceValues) > 0: # take only specified values 169 # perhaps both of these should be normalized to int 170 cadences = group.query("exptime == @cadenceValues") 171 else: # any value is good 172 cadences = group 173 174 # total count 175 stats[mission][cadence]["total"] = len(cadences) 176 177 if detailed: 178 # count by sectors 179 stats[mission][cadence]["by-sectors"] = {} 180 for m in cadences["mission"]: 181 # logger.debug(cadences.query("mission == @m")[ 182 # "exptime" 183 # ].values) 184 sectorMatch = re.search( 185 missionSectorRegExes[mission], 186 m 187 ) 188 if not sectorMatch: 189 raise ValueError( 190 " ".join(( 191 "Couldn't extract sector from", 192 f"this mission value: {m}" 193 )) 194 ) 195 sector = sectorMatch.group(1) 196 if not stats[mission][cadence]["by-sectors"].get( 197 sector 198 ): # this sector hasn't been added yet 199 stats[mission][cadence]["by-sectors"][ 200 sector 201 ] = {} 202 # save the cadence/exptime too (assuming 203 # that it is the same for every sector entry) 204 stats[mission][cadence]["by-sectors"][sector][ 205 "exptime" 206 ] = cadences.query("mission == @m")[ 207 "exptime" 208 ].values[0] # there must be a better way 209 try: 210 stats[mission][cadence][ 211 "by-sectors" 212 ][sector]["count"] += 1 213 except KeyError: 214 stats[mission][cadence][ 215 "by-sectors" 216 ][sector]["count"] = 1 217 return stats 218 219 220def getLightCurveIDs( 221 starName: str 222) -> Dict[str, List[str]]: 223 """ 224 Based on available cadence values statistics for a given star, 225 get names of missions and cadences. For instance, in order to pass 226 them to `altaipony.lcio.from_mast()`. 227 228 Example: 229 230 ``` py 231 from phab.utils.databases import lightcurves 232 from altaipony.lcio import from_mast 233 234 starName = "LTT 1445 A" 235 lightCurveIDs = {} 236 237 try: 238 lightCurveIDs = lightcurves.getLightCurveIDs(starName) 239 except ValueError as ex: 240 print(f"Failed to get light curves missons and cadences. {ex}") 241 raise 242 if not lightCurveIDs: 243 raise ValueError("Didn't find any results for this star") 244 #print(lightCurveIDs) 245 246 for m in lightCurveIDs.keys(): 247 #print(f"Mission: {m}") 248 for c in lightCurveIDs[m]: 249 #print(f"- {c}") 250 flc = from_mast( 251 starName, 252 mode="LC", 253 cadence=c, 254 mission=m 255 ) 256 #print(flc) 257 ``` 258 """ 259 lightCurveIDs: Dict[str, List[str]] = {} 260 261 stats: Dict[str, Dict] = getLightCurveStats( 262 starName, 263 detailed=False 264 ) 265 if not stats: 266 raise ValueError("Didn't find any results for this star") 267 268 # the order matters, it goes from most important to least important, 269 # and in fact long cadence is so not important that it is discarded 270 # if there is fast or short cadence available 271 cadencePriority = ["fast", "short", "long"] 272 273 for m in stats.keys(): 274 lightCurveIDs[m] = [] 275 priorityThreshold = 0 276 for cp in cadencePriority: 277 # if there is already fast or short cadence in the list, 278 # don't take long cadence (except for mission K2, because 279 # its long cadence is what's most important even if 280 # there are also fast and short ones) 281 if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2": 282 break 283 if cp in stats[m]: 284 # print(f"Count [{cp}]: {stats[m][cp]['total']}") 285 totalCnt = stats[m][cp].get("total") 286 if totalCnt and totalCnt != 0: 287 lightCurveIDs[m].append(cp) 288 # else: 289 # print( 290 # " ".join(( 291 # f"[WARNING] The [{cp}] cadence count", 292 # f"in [{m}] is 0 (or missing)" 293 # )) 294 # ) 295 priorityThreshold += 1 296 297 return lightCurveIDs
missionSectorRegExes: Dict[str, Pattern] =
{'Kepler': re.compile('^Kepler\\s\\w+\\s(\\d+)$'), 'K2': re.compile('^K2\\s\\w+\\s(\\d+)$'), 'TESS': re.compile('^TESS\\s\\w+\\s(\\d+)$')}
Dictionary of regular expressions for extracting sectors.
def
getLightCurveStats(starName: str, detailed: bool = True) -> Dict[str, Dict]:
78def getLightCurveStats( 79 starName: str, 80 detailed: bool = True 81) -> Dict[str, Dict]: 82 """ 83 Gather statistics about available cadence values for a given star. 84 85 If `detailed` is set to `False`, then function will skip collecting 86 cadence values count by sectors, so resulting statistics will only 87 contain total count of values. 88 89 Example: 90 91 ``` py 92 from phab.utils.databases import lightcurves 93 94 stats = lightcurves.getLightCurveStats("Kepler-114") 95 if not stats: 96 print("Didn't find any results for this star") 97 else: 98 missionName = "Kepler" 99 cadenceType = "long" 100 sectors = stats.get( 101 missionName, 102 {} 103 ).get(cadenceType) 104 if sectors is None: 105 print( 106 " ".join(( 107 "There doesn't seem to be any sectors", 108 f"with [{cadenceType}] cadence by [{missionName}]" 109 )) 110 ) 111 else: 112 totalProperty = "total" 113 sectorsCount = sectors.get(totalProperty) 114 if sectorsCount is None: 115 print( 116 " ".join(( 117 f"For some reason, the [{totalProperty}] property", 118 f"is missing from the [{cadenceType}] cadence", 119 f"collection by [{missionName}]" 120 )) 121 ) 122 else: 123 print( 124 " ".join(( 125 f"Total amount of sectors with [{cadenceType}]", 126 f"cadence by [{missionName}]: {sectorsCount}", 127 )) 128 ) 129 bySectors = sectors.get("by-sectors") 130 if bySectors is None: 131 print( 132 " ".join(( 133 "For some reason, the [total] property is missing", 134 f"from the [{cadenceType}] cadence collection", 135 f"by [{missionName}]" 136 )) 137 ) 138 else: 139 for s in bySectors: 140 print(f"- {s}: {bySectors[s]}") 141 ``` 142 """ 143 stats: Dict[str, Dict] = {} 144 145 lghtcrvs = lightkurve.search_lightcurve( 146 starName, 147 author=tuple(authors.keys()) 148 ) 149 if len(lghtcrvs) != 0: 150 tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[ 151 ["author", "exptime", "mission"] 152 ] 153 logger.debug(tbl) 154 155 author: str # for mypy, but even then it is not happy with something else 156 for author, group in (tbl.groupby("author")): # type:ignore[assignment] # ya hz 157 if author not in authors: 158 raise ValueError(f"Unknown author: {author}") 159 mission = authors[author]["mission"] 160 if not stats.get(mission): 161 stats[mission] = {} 162 for cadence in ["long", "short", "fast"]: 163 if cadence in authors[author]["cadence"]: 164 stats[mission][cadence] = {} 165 cadenceValues: List[int] = ( 166 authors[author]["cadence"][cadence] 167 ) 168 cadences: pandas.DataFrame 169 if len(cadenceValues) > 0: # take only specified values 170 # perhaps both of these should be normalized to int 171 cadences = group.query("exptime == @cadenceValues") 172 else: # any value is good 173 cadences = group 174 175 # total count 176 stats[mission][cadence]["total"] = len(cadences) 177 178 if detailed: 179 # count by sectors 180 stats[mission][cadence]["by-sectors"] = {} 181 for m in cadences["mission"]: 182 # logger.debug(cadences.query("mission == @m")[ 183 # "exptime" 184 # ].values) 185 sectorMatch = re.search( 186 missionSectorRegExes[mission], 187 m 188 ) 189 if not sectorMatch: 190 raise ValueError( 191 " ".join(( 192 "Couldn't extract sector from", 193 f"this mission value: {m}" 194 )) 195 ) 196 sector = sectorMatch.group(1) 197 if not stats[mission][cadence]["by-sectors"].get( 198 sector 199 ): # this sector hasn't been added yet 200 stats[mission][cadence]["by-sectors"][ 201 sector 202 ] = {} 203 # save the cadence/exptime too (assuming 204 # that it is the same for every sector entry) 205 stats[mission][cadence]["by-sectors"][sector][ 206 "exptime" 207 ] = cadences.query("mission == @m")[ 208 "exptime" 209 ].values[0] # there must be a better way 210 try: 211 stats[mission][cadence][ 212 "by-sectors" 213 ][sector]["count"] += 1 214 except KeyError: 215 stats[mission][cadence][ 216 "by-sectors" 217 ][sector]["count"] = 1 218 return stats
Gather statistics about available cadence values for a given star.
If detailed
is set to False
, then function will skip collecting
cadence values count by sectors, so resulting statistics will only
contain total count of values.
Example:
from phab.utils.databases import lightcurves
stats = lightcurves.getLightCurveStats("Kepler-114")
if not stats:
print("Didn't find any results for this star")
else:
missionName = "Kepler"
cadenceType = "long"
sectors = stats.get(
missionName,
{}
).get(cadenceType)
if sectors is None:
print(
" ".join((
"There doesn't seem to be any sectors",
f"with [{cadenceType}] cadence by [{missionName}]"
))
)
else:
totalProperty = "total"
sectorsCount = sectors.get(totalProperty)
if sectorsCount is None:
print(
" ".join((
f"For some reason, the [{totalProperty}] property",
f"is missing from the [{cadenceType}] cadence",
f"collection by [{missionName}]"
))
)
else:
print(
" ".join((
f"Total amount of sectors with [{cadenceType}]",
f"cadence by [{missionName}]: {sectorsCount}",
))
)
bySectors = sectors.get("by-sectors")
if bySectors is None:
print(
" ".join((
"For some reason, the [total] property is missing",
f"from the [{cadenceType}] cadence collection",
f"by [{missionName}]"
))
)
else:
for s in bySectors:
print(f"- {s}: {bySectors[s]}")
def
getLightCurveIDs(starName: str) -> Dict[str, List[str]]:
221def getLightCurveIDs( 222 starName: str 223) -> Dict[str, List[str]]: 224 """ 225 Based on available cadence values statistics for a given star, 226 get names of missions and cadences. For instance, in order to pass 227 them to `altaipony.lcio.from_mast()`. 228 229 Example: 230 231 ``` py 232 from phab.utils.databases import lightcurves 233 from altaipony.lcio import from_mast 234 235 starName = "LTT 1445 A" 236 lightCurveIDs = {} 237 238 try: 239 lightCurveIDs = lightcurves.getLightCurveIDs(starName) 240 except ValueError as ex: 241 print(f"Failed to get light curves missons and cadences. {ex}") 242 raise 243 if not lightCurveIDs: 244 raise ValueError("Didn't find any results for this star") 245 #print(lightCurveIDs) 246 247 for m in lightCurveIDs.keys(): 248 #print(f"Mission: {m}") 249 for c in lightCurveIDs[m]: 250 #print(f"- {c}") 251 flc = from_mast( 252 starName, 253 mode="LC", 254 cadence=c, 255 mission=m 256 ) 257 #print(flc) 258 ``` 259 """ 260 lightCurveIDs: Dict[str, List[str]] = {} 261 262 stats: Dict[str, Dict] = getLightCurveStats( 263 starName, 264 detailed=False 265 ) 266 if not stats: 267 raise ValueError("Didn't find any results for this star") 268 269 # the order matters, it goes from most important to least important, 270 # and in fact long cadence is so not important that it is discarded 271 # if there is fast or short cadence available 272 cadencePriority = ["fast", "short", "long"] 273 274 for m in stats.keys(): 275 lightCurveIDs[m] = [] 276 priorityThreshold = 0 277 for cp in cadencePriority: 278 # if there is already fast or short cadence in the list, 279 # don't take long cadence (except for mission K2, because 280 # its long cadence is what's most important even if 281 # there are also fast and short ones) 282 if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2": 283 break 284 if cp in stats[m]: 285 # print(f"Count [{cp}]: {stats[m][cp]['total']}") 286 totalCnt = stats[m][cp].get("total") 287 if totalCnt and totalCnt != 0: 288 lightCurveIDs[m].append(cp) 289 # else: 290 # print( 291 # " ".join(( 292 # f"[WARNING] The [{cp}] cadence count", 293 # f"in [{m}] is 0 (or missing)" 294 # )) 295 # ) 296 priorityThreshold += 1 297 298 return lightCurveIDs
Based on available cadence values statistics for a given star,
get names of missions and cadences. For instance, in order to pass
them to altaipony.lcio.from_mast()
.
Example:
from phab.utils.databases import lightcurves
from altaipony.lcio import from_mast
starName = "LTT 1445 A"
lightCurveIDs = {}
try:
lightCurveIDs = lightcurves.getLightCurveIDs(starName)
except ValueError as ex:
print(f"Failed to get light curves missons and cadences. {ex}")
raise
if not lightCurveIDs:
raise ValueError("Didn't find any results for this star")
#print(lightCurveIDs)
for m in lightCurveIDs.keys():
#print(f"Mission: {m}")
for c in lightCurveIDs[m]:
#print(f"- {c}")
flc = from_mast(
starName,
mode="LC",
cadence=c,
mission=m
)
#print(flc)