uio.utility.databases.lightcurves
Getting light curves data.
1""" 2Getting light curves data. 3""" 4 5import lightkurve 6import pandas 7import re 8 9from typing import Optional, Dict, List, Pattern 10 11from ..logs.log import logger 12 13# apparently, one cannot set long/short threshold, 14# hence this dictionary 15# 16# there are actually more authors available, 17# but we are only interested in these 18# 19authors: Dict[str, Dict] = { 20 "Kepler": 21 { 22 "mission": "Kepler", 23 "cadence": 24 { 25 "long": [1800], 26 "short": [60] 27 } 28 }, 29 "K2": 30 { 31 "mission": "K2", 32 "cadence": 33 { 34 "long": [1800], 35 "short": [60] 36 } 37 }, 38 "SPOC": 39 { 40 "mission": "TESS", 41 "cadence": 42 { 43 "long": [600], 44 "short": [120], 45 "fast": [20] 46 } 47 }, 48 "TESS-SPOC": 49 { 50 "mission": "TESS", 51 "cadence": 52 { 53 "long": [] # any cadence is long 54 } 55 } 56} 57""" 58Dictionary of authors, their cadence values and mapping to missions. 59""" 60 61missionSectorRegExes: Dict[str, Pattern] = { 62 "Kepler": re.compile( 63 r"^Kepler\s\w+\s(\d+)$" # Kepler Quarter 15 64 ), 65 "K2": re.compile( 66 r"^K2\s\w+\s(\d+)$" # K2 Campaign 12 67 ), 68 "TESS": re.compile( 69 r"^TESS\s\w+\s(\d+)$" # TESS Sector 40 70 ) 71} 72""" 73Dictionary of regular expressions for extracting sectors. 74""" 75 76 77def getLightCurveStats( 78 starName: str, 79 detailed: bool = True 80) -> Dict[str, Dict]: 81 """ 82 Gather statistics about available cadence values for a given star. 83 84 If `detailed` is set to `False`, then function will skip collecting 85 cadence values count by sectors, so resulting statistics will only 86 contain total count of values. 87 88 Example: 89 90 ``` py 91 from uio.utility.databases import lightcurves 92 93 stats = lightcurves.getLightCurveStats("Kepler-114") 94 if not stats: 95 print("Didn't find any results for this star") 96 else: 97 missionName = "Kepler" 98 cadenceType = "long" 99 sectors = stats.get( 100 missionName, 101 {} 102 ).get(cadenceType) 103 if sectors is None: 104 print( 105 " ".join(( 106 "There doesn't seem to be any sectors", 107 f"with [{cadenceType}] cadence by [{missionName}]" 108 )) 109 ) 110 else: 111 totalProperty = "total" 112 sectorsCount = sectors.get(totalProperty) 113 if sectorsCount is None: 114 print( 115 " ".join(( 116 f"For some reason, the [{totalProperty}] property", 117 f"is missing from the [{cadenceType}] cadence", 118 f"collection by [{missionName}]" 119 )) 120 ) 121 else: 122 print( 123 " ".join(( 124 f"Total amount of sectors with [{cadenceType}]", 125 f"cadence by [{missionName}]: {sectorsCount}", 126 )) 127 ) 128 bySectors = sectors.get("by-sectors") 129 if bySectors is None: 130 print( 131 " ".join(( 132 "For some reason, the [total] property is missing", 133 f"from the [{cadenceType}] cadence collection", 134 f"by [{missionName}]" 135 )) 136 ) 137 else: 138 for s in bySectors: 139 print(f"- {s}: {bySectors[s]}") 140 ``` 141 """ 142 stats: Dict[str, Dict] = {} 143 144 lghtcrvs = lightkurve.search_lightcurve( 145 starName, 146 author=tuple(authors.keys()) 147 ) 148 if len(lghtcrvs) != 0: 149 tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[ 150 ["author", "exptime", "mission"] 151 ] 152 logger.debug(tbl) 153 154 for author, group in (tbl.groupby("author")): 155 if author not in authors: 156 raise ValueError(f"Unknown author: {author}") 157 mission = authors[author]["mission"] 158 if not stats.get(mission): 159 stats[mission] = {} 160 for cadence in ["long", "short", "fast"]: 161 if cadence in authors[author]["cadence"]: 162 stats[mission][cadence] = {} 163 cadenceValues: List[int] = ( 164 authors[author]["cadence"][cadence] 165 ) 166 cadences: pandas.DataFrame = None 167 if len(cadenceValues) > 0: # take only specified values 168 # perhaps both of these should be normalized to int 169 cadences = group.query("exptime == @cadenceValues") 170 else: # any value is good 171 cadences = group 172 173 # total count 174 stats[mission][cadence]["total"] = len(cadences) 175 176 if detailed: 177 # count by sectors 178 stats[mission][cadence]["by-sectors"] = {} 179 for m in cadences["mission"]: 180 # logger.debug(cadences.query("mission == @m")[ 181 # "exptime" 182 # ].values) 183 sectorMatch = re.search( 184 missionSectorRegExes[mission], 185 m 186 ) 187 if not sectorMatch: 188 raise ValueError( 189 " ".join(( 190 "Couldn't extract sector from", 191 f"this mission value: {m}" 192 )) 193 ) 194 sector = sectorMatch.group(1) 195 if not stats[mission][cadence]["by-sectors"].get( 196 sector 197 ): # this sector hasn't been added yet 198 stats[mission][cadence]["by-sectors"][ 199 sector 200 ] = {} 201 # save the cadence/exptime too (assuming 202 # that it is the same for every sector entry) 203 stats[mission][cadence]["by-sectors"][sector][ 204 "exptime" 205 ] = cadences.query("mission == @m")[ 206 "exptime" 207 ].values[0] # there must be a better way 208 try: 209 stats[mission][cadence][ 210 "by-sectors" 211 ][sector]["count"] += 1 212 except KeyError: 213 stats[mission][cadence][ 214 "by-sectors" 215 ][sector]["count"] = 1 216 return stats 217 218 219def getLightCurveIDs( 220 starName: str 221) -> Dict[str, List[str]]: 222 """ 223 Based on available cadence values statistics for a given star, 224 get names of missions and cadences. For instance, in order to pass 225 them to `altaipony.lcio.from_mast()`. 226 227 Example: 228 229 ``` py 230 from uio.utility.databases import lightcurves 231 from altaipony.lcio import from_mast 232 233 starName = "LTT 1445 A" 234 lightCurveIDs = {} 235 236 try: 237 lightCurveIDs = lightcurves.getLightCurveIDs(starName) 238 except ValueError as ex: 239 print(f"Failed to get light curves missons and cadences. {ex}") 240 raise 241 if not lightCurveIDs: 242 raise ValueError("Didn't find any results for this star") 243 #print(lightCurveIDs) 244 245 for m in lightCurveIDs.keys(): 246 #print(f"Mission: {m}") 247 for c in lightCurveIDs[m]: 248 #print(f"- {c}") 249 flc = from_mast( 250 starName, 251 mode="LC", 252 cadence=c, 253 mission=m 254 ) 255 #print(flc) 256 ``` 257 """ 258 lightCurveIDs: Dict[str, List[str]] = {} 259 260 stats: Dict[str, Dict] = getLightCurveStats( 261 starName, 262 detailed=False 263 ) 264 if not stats: 265 raise ValueError("Didn't find any results for this star") 266 267 # the order matters, it goes from most important to least important, 268 # and in fact long cadence is so not important that it is discarded 269 # if there is fast or short cadence available 270 cadencePriority = ["fast", "short", "long"] 271 272 for m in stats.keys(): 273 lightCurveIDs[m] = [] 274 priorityThreshold = 0 275 for cp in cadencePriority: 276 # if there is already fast or short cadence in the list, 277 # don't take long cadence (except for mission K2, because 278 # its long cadence is what's most important even if 279 # there are also fast and short ones) 280 if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2": 281 break 282 if cp in stats[m]: 283 # print(f"Count [{cp}]: {stats[m][cp]['total']}") 284 totalCnt = stats[m][cp].get("total") 285 if totalCnt and totalCnt != 0: 286 lightCurveIDs[m].append(cp) 287 # else: 288 # print( 289 # " ".join(( 290 # f"[WARNING] The [{cp}] cadence count", 291 # f"in [{m}] is 0 (or missing)" 292 # )) 293 # ) 294 priorityThreshold += 1 295 296 return lightCurveIDs
missionSectorRegExes: Dict[str, Pattern] =
{'Kepler': re.compile('^Kepler\\s\\w+\\s(\\d+)$'), 'K2': re.compile('^K2\\s\\w+\\s(\\d+)$'), 'TESS': re.compile('^TESS\\s\\w+\\s(\\d+)$')}
Dictionary of regular expressions for extracting sectors.
def
getLightCurveStats(starName: str, detailed: bool = True) -> Dict[str, Dict]:
78def getLightCurveStats( 79 starName: str, 80 detailed: bool = True 81) -> Dict[str, Dict]: 82 """ 83 Gather statistics about available cadence values for a given star. 84 85 If `detailed` is set to `False`, then function will skip collecting 86 cadence values count by sectors, so resulting statistics will only 87 contain total count of values. 88 89 Example: 90 91 ``` py 92 from uio.utility.databases import lightcurves 93 94 stats = lightcurves.getLightCurveStats("Kepler-114") 95 if not stats: 96 print("Didn't find any results for this star") 97 else: 98 missionName = "Kepler" 99 cadenceType = "long" 100 sectors = stats.get( 101 missionName, 102 {} 103 ).get(cadenceType) 104 if sectors is None: 105 print( 106 " ".join(( 107 "There doesn't seem to be any sectors", 108 f"with [{cadenceType}] cadence by [{missionName}]" 109 )) 110 ) 111 else: 112 totalProperty = "total" 113 sectorsCount = sectors.get(totalProperty) 114 if sectorsCount is None: 115 print( 116 " ".join(( 117 f"For some reason, the [{totalProperty}] property", 118 f"is missing from the [{cadenceType}] cadence", 119 f"collection by [{missionName}]" 120 )) 121 ) 122 else: 123 print( 124 " ".join(( 125 f"Total amount of sectors with [{cadenceType}]", 126 f"cadence by [{missionName}]: {sectorsCount}", 127 )) 128 ) 129 bySectors = sectors.get("by-sectors") 130 if bySectors is None: 131 print( 132 " ".join(( 133 "For some reason, the [total] property is missing", 134 f"from the [{cadenceType}] cadence collection", 135 f"by [{missionName}]" 136 )) 137 ) 138 else: 139 for s in bySectors: 140 print(f"- {s}: {bySectors[s]}") 141 ``` 142 """ 143 stats: Dict[str, Dict] = {} 144 145 lghtcrvs = lightkurve.search_lightcurve( 146 starName, 147 author=tuple(authors.keys()) 148 ) 149 if len(lghtcrvs) != 0: 150 tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[ 151 ["author", "exptime", "mission"] 152 ] 153 logger.debug(tbl) 154 155 for author, group in (tbl.groupby("author")): 156 if author not in authors: 157 raise ValueError(f"Unknown author: {author}") 158 mission = authors[author]["mission"] 159 if not stats.get(mission): 160 stats[mission] = {} 161 for cadence in ["long", "short", "fast"]: 162 if cadence in authors[author]["cadence"]: 163 stats[mission][cadence] = {} 164 cadenceValues: List[int] = ( 165 authors[author]["cadence"][cadence] 166 ) 167 cadences: pandas.DataFrame = None 168 if len(cadenceValues) > 0: # take only specified values 169 # perhaps both of these should be normalized to int 170 cadences = group.query("exptime == @cadenceValues") 171 else: # any value is good 172 cadences = group 173 174 # total count 175 stats[mission][cadence]["total"] = len(cadences) 176 177 if detailed: 178 # count by sectors 179 stats[mission][cadence]["by-sectors"] = {} 180 for m in cadences["mission"]: 181 # logger.debug(cadences.query("mission == @m")[ 182 # "exptime" 183 # ].values) 184 sectorMatch = re.search( 185 missionSectorRegExes[mission], 186 m 187 ) 188 if not sectorMatch: 189 raise ValueError( 190 " ".join(( 191 "Couldn't extract sector from", 192 f"this mission value: {m}" 193 )) 194 ) 195 sector = sectorMatch.group(1) 196 if not stats[mission][cadence]["by-sectors"].get( 197 sector 198 ): # this sector hasn't been added yet 199 stats[mission][cadence]["by-sectors"][ 200 sector 201 ] = {} 202 # save the cadence/exptime too (assuming 203 # that it is the same for every sector entry) 204 stats[mission][cadence]["by-sectors"][sector][ 205 "exptime" 206 ] = cadences.query("mission == @m")[ 207 "exptime" 208 ].values[0] # there must be a better way 209 try: 210 stats[mission][cadence][ 211 "by-sectors" 212 ][sector]["count"] += 1 213 except KeyError: 214 stats[mission][cadence][ 215 "by-sectors" 216 ][sector]["count"] = 1 217 return stats
Gather statistics about available cadence values for a given star.
If detailed
is set to False
, then function will skip collecting
cadence values count by sectors, so resulting statistics will only
contain total count of values.
Example:
from uio.utility.databases import lightcurves
stats = lightcurves.getLightCurveStats("Kepler-114")
if not stats:
print("Didn't find any results for this star")
else:
missionName = "Kepler"
cadenceType = "long"
sectors = stats.get(
missionName,
{}
).get(cadenceType)
if sectors is None:
print(
" ".join((
"There doesn't seem to be any sectors",
f"with [{cadenceType}] cadence by [{missionName}]"
))
)
else:
totalProperty = "total"
sectorsCount = sectors.get(totalProperty)
if sectorsCount is None:
print(
" ".join((
f"For some reason, the [{totalProperty}] property",
f"is missing from the [{cadenceType}] cadence",
f"collection by [{missionName}]"
))
)
else:
print(
" ".join((
f"Total amount of sectors with [{cadenceType}]",
f"cadence by [{missionName}]: {sectorsCount}",
))
)
bySectors = sectors.get("by-sectors")
if bySectors is None:
print(
" ".join((
"For some reason, the [total] property is missing",
f"from the [{cadenceType}] cadence collection",
f"by [{missionName}]"
))
)
else:
for s in bySectors:
print(f"- {s}: {bySectors[s]}")
def
getLightCurveIDs(starName: str) -> Dict[str, List[str]]:
220def getLightCurveIDs( 221 starName: str 222) -> Dict[str, List[str]]: 223 """ 224 Based on available cadence values statistics for a given star, 225 get names of missions and cadences. For instance, in order to pass 226 them to `altaipony.lcio.from_mast()`. 227 228 Example: 229 230 ``` py 231 from uio.utility.databases import lightcurves 232 from altaipony.lcio import from_mast 233 234 starName = "LTT 1445 A" 235 lightCurveIDs = {} 236 237 try: 238 lightCurveIDs = lightcurves.getLightCurveIDs(starName) 239 except ValueError as ex: 240 print(f"Failed to get light curves missons and cadences. {ex}") 241 raise 242 if not lightCurveIDs: 243 raise ValueError("Didn't find any results for this star") 244 #print(lightCurveIDs) 245 246 for m in lightCurveIDs.keys(): 247 #print(f"Mission: {m}") 248 for c in lightCurveIDs[m]: 249 #print(f"- {c}") 250 flc = from_mast( 251 starName, 252 mode="LC", 253 cadence=c, 254 mission=m 255 ) 256 #print(flc) 257 ``` 258 """ 259 lightCurveIDs: Dict[str, List[str]] = {} 260 261 stats: Dict[str, Dict] = getLightCurveStats( 262 starName, 263 detailed=False 264 ) 265 if not stats: 266 raise ValueError("Didn't find any results for this star") 267 268 # the order matters, it goes from most important to least important, 269 # and in fact long cadence is so not important that it is discarded 270 # if there is fast or short cadence available 271 cadencePriority = ["fast", "short", "long"] 272 273 for m in stats.keys(): 274 lightCurveIDs[m] = [] 275 priorityThreshold = 0 276 for cp in cadencePriority: 277 # if there is already fast or short cadence in the list, 278 # don't take long cadence (except for mission K2, because 279 # its long cadence is what's most important even if 280 # there are also fast and short ones) 281 if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2": 282 break 283 if cp in stats[m]: 284 # print(f"Count [{cp}]: {stats[m][cp]['total']}") 285 totalCnt = stats[m][cp].get("total") 286 if totalCnt and totalCnt != 0: 287 lightCurveIDs[m].append(cp) 288 # else: 289 # print( 290 # " ".join(( 291 # f"[WARNING] The [{cp}] cadence count", 292 # f"in [{m}] is 0 (or missing)" 293 # )) 294 # ) 295 priorityThreshold += 1 296 297 return lightCurveIDs
Based on available cadence values statistics for a given star,
get names of missions and cadences. For instance, in order to pass
them to altaipony.lcio.from_mast()
.
Example:
from uio.utility.databases import lightcurves
from altaipony.lcio import from_mast
starName = "LTT 1445 A"
lightCurveIDs = {}
try:
lightCurveIDs = lightcurves.getLightCurveIDs(starName)
except ValueError as ex:
print(f"Failed to get light curves missons and cadences. {ex}")
raise
if not lightCurveIDs:
raise ValueError("Didn't find any results for this star")
#print(lightCurveIDs)
for m in lightCurveIDs.keys():
#print(f"Mission: {m}")
for c in lightCurveIDs[m]:
#print(f"- {c}")
flc = from_mast(
starName,
mode="LC",
cadence=c,
mission=m
)
#print(flc)