uio.utility.databases.tap
Getting data from various databases via TAP interface.
1""" 2Getting data from various databases 3via [TAP](https://ivoa.net/documents/TAP/) interface. 4""" 5 6# what is available for importing from __init__.py 7# __all__ = [ 8# "services", 9# "queryService", 10# ... 11# ] 12 13import pyvo 14import re 15 16from typing import Optional, Dict, List, Tuple, Any, cast 17 18from ..logs.log import logger 19from ..strings import extraction, conversion 20 21services: Dict[str, Dict] = { 22 "nasa": 23 { 24 # case-sensitive URL 25 "endpoint": "https://exoplanetarchive.ipac.caltech.edu/TAP", 26 "parameters-that-are-strings": 27 [ 28 "st_metratio", 29 "st_spectype" 30 ], 31 "parameters-that-have-errors": 32 [ 33 "period", 34 "pl_massj", 35 "pl_orbper", 36 "pl_orbsmax", 37 "pl_radj", 38 "semi_major_axis" 39 ], 40 "drops-leading-zero-on-cast-to-varchar": True 41 }, 42 "padc": 43 { 44 "endpoint": "http://voparis-tap-planeto.obspm.fr/tap", 45 "drops-leading-zero-on-cast-to-varchar": False 46 }, 47 "gaia": 48 { 49 "endpoint": "https://gea.esac.esa.int/tap-server/tap", 50 "drops-leading-zero-on-cast-to-varchar": False 51 }, 52 "simbad": 53 { 54 "endpoint": "http://simbad.cds.unistra.fr/simbad/sim-tap/sync" 55 # does not support CAST, so no "drops-leading-zero-on-cast-to-varchar" 56 } 57} 58""" 59Dictionary of the most common TAP services. 60""" 61 62mappings: Dict[str, Dict] = { 63 "NASA-to-PADC": 64 { 65 "planets": 66 { 67 "ima_flag": "ima_flag", 68 "pl_massj": "mass", 69 "pl_massjerr1": "mass_error_max", 70 "pl_massjerr2": "mass_error_min", 71 "pl_massjlim": "pl_massjlim", 72 "pl_name": "granule_uid", 73 "pl_orbeccen": "eccentricity", 74 "pl_orbincl": "inclination", 75 "pl_orbper": "period", 76 "pl_orbpererr1": "period_error_max", 77 "pl_orbpererr2": "period_error_min", 78 "pl_orbperlim": "pl_orbperlim", 79 "pl_orbsmax": "semi_major_axis", 80 "pl_orbsmaxerr1": "semi_major_axis_error_max", 81 "pl_orbsmaxerr2": "semi_major_axis_error_min", 82 "pl_orbsmaxlim": "pl_orbsmaxlim", 83 "pl_radj": "radius", 84 "pl_radjerr1": "radius_error_max", 85 "pl_radjerr2": "radius_error_min", 86 "pl_radjlim": "pl_radjlim", 87 "rv_flag": "rv_flag", 88 "tran_flag": "tran_flag", 89 "ttv_flag": "ttv_flag" 90 }, 91 "stars": 92 { 93 "cb_flag": "cb_flag", 94 "hostname": "star_name", 95 "ra": "ra", 96 "st_age": "star_age", 97 "st_lum": "st_lum", 98 "st_mass": "star_mass", 99 "st_met": "star_metallicity", 100 "st_metratio": "st_metratio", 101 "st_rad": "star_radius", 102 "st_rotp": "st_rotp", 103 "st_spectype": "star_spec_type", 104 "st_teff": "star_teff", 105 "sy_dist": "sy_dist", 106 "sy_pnum": "sy_pnum", 107 "sy_snum": "sy_snum" 108 } 109 } 110} 111""" 112Mapping tables columns between different databases. 113""" 114 115 116def getServiceEndpoint(tapServiceName: str) -> str: 117 """ 118 Get TAP service endpoint by the service/database name. 119 120 Example: 121 122 ``` py 123 from uio.utility.databases import tap 124 125 tapServiceEndpoint = tap.getServiceEndpoint("padc") 126 print(tapServiceEndpoint) 127 ``` 128 """ 129 tapService = services.get(tapServiceName) 130 if tapService: 131 tapServiceEndpoint = tapService.get("endpoint") 132 if tapServiceEndpoint: 133 return tapServiceEndpoint 134 else: 135 raise ValueError( 136 f"The [{tapServiceName}] service has no registered endpoint" 137 ) 138 else: 139 raise ValueError( 140 f"There is no TAP service under the name [{tapServiceName}]" 141 ) 142 143 144def escapeSpecialCharactersForAdql(rawQuery: str) -> str: 145 """ 146 Escape certain special characters in ADQL query. For now only escapes 147 a single quote character. 148 149 Example: 150 151 ``` py 152 from uio.utility.databases import tap 153 154 rawQuery = " ".join(( 155 "SELECT oid FROM basic", 156 "WHERE main_id = 'NAME Teegarden's Star'", 157 "AND main_id != 'someone else's star'" 158 )) 159 print(rawQuery) 160 escapedQuery = tap.escapeSpecialCharactersForAdql(rawQuery) 161 print(escapedQuery) 162 ``` 163 """ 164 escapedQuery: str = re.sub( 165 r"('([^']*)(')([^']*)')", 166 r"'\g<2>'\g<3>\g<4>'", 167 rawQuery 168 ) 169 return escapedQuery 170 171 172def queryService( 173 tapEndpoint: str, 174 adqlQuery: str, 175 tryToReExecuteOnFailure: bool = True 176) -> Optional[pyvo.dal.tap.TAPResults]: 177 """ 178 Send [ADQL](https://ivoa.net/documents/ADQL/) request to the TAP service 179 and return results. Those can be then converted to 180 a [Pandas](https://pandas.pydata.org) table. 181 182 Example: 183 184 ``` py 185 from uio.utility.databases import tap 186 187 tbl = tap.queryService( 188 tap.getServiceEndpoint("padc"), 189 " ".join(( 190 "SELECT star_name, granule_uid, mass, radius, period, semi_major_axis", 191 "FROM exoplanet.epn_core", 192 "WHERE star_name = 'Kepler-107'", 193 "ORDER BY granule_uid" 194 )) 195 ) 196 if tbl: 197 print(tbl.to_table().to_pandas()) 198 else: 199 print("No results") 200 ``` 201 """ 202 tapService = pyvo.dal.TAPService(tapEndpoint) 203 logger.debug(f"ADQL query to execute: {adqlQuery}") 204 results = None 205 try: 206 results = tapService.search(adqlQuery) 207 except pyvo.dal.exceptions.DALQueryError as ex: 208 if tryToReExecuteOnFailure: 209 logger.warning( 210 " ".join(( 211 "The query failed, will try to execute again,", 212 "but this time with escaped characters. Original", 213 f"error message: {ex}" 214 )) 215 ) 216 adqlQueryEscaped = escapeSpecialCharactersForAdql(adqlQuery) 217 logger.debug(f"Escaped ADQL query to execute: {adqlQueryEscaped}") 218 results = tapService.search(adqlQueryEscaped) 219 else: 220 raise 221 if results is not None and len(results) > 0: 222 logger.debug(f"Results: {len(results)}") 223 return results 224 else: 225 return None 226 227 228def getParametersThatAreDoubleInNASA() -> List[str]: 229 """ 230 Get the list of parameters names in the NASA `ps` table that have 231 the `double` type. That is needed so you knew when to apply 232 `CAST(PARAMETER_NAME_HERE AS REAL)` in your `SELECT` statements 233 or `CAST(PARAMETER_NAME_HERE AS VARCHAR(30))` in your `WHERE` 234 statements, otherwise NASA returns rounded values by default 235 (according to their `format` value in `tap_schema.columns`), 236 so you might not get the expected results. 237 238 Example: 239 240 ``` py 241 from uio.utility.databases import tap 242 243 doubles = tap.getParametersThatAreDoubleInNASA() 244 print(doubles) 245 ``` 246 """ 247 doubles: List[str] = list() 248 249 results = queryService( 250 getServiceEndpoint("nasa"), 251 " ".join(( 252 f"SELECT column_name", 253 f"FROM tap_schema.columns", 254 f"WHERE table_name = 'ps' AND datatype = 'double'" 255 )) 256 ) 257 if results: 258 doubles = list(results.getcolumn("column_name").flatten()) 259 260 return doubles 261 262 263def getStellarParameterFromNASA( 264 systemName: str, 265 param: str, 266 parameterTypeIsDouble: bool = False 267) -> Optional[Any]: 268 """ 269 Get the latest (*the newest*) published stellar parameter 270 from NASA database. 271 272 The `parameterTypeIsDouble` argument is a workaround for the problem with 273 [inconsistent values](https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#float-values-are-rounded-on-select-but-compared-to-originals-in-where) 274 in `SELECT`/`WHERE`. 275 276 Example: 277 278 ``` py 279 from uio.utility.databases import tap 280 281 doubles = tap.getParametersThatAreDoubleInNASA() 282 param = "st_teff" 283 val = tap.getStellarParameterFromNASA( 284 "Kepler-11", 285 param, 286 parameterTypeIsDouble=(param in doubles) 287 ) 288 print(val) 289 ``` 290 """ 291 results = queryService( 292 getServiceEndpoint("nasa"), 293 " ".join(( 294 # TOP is broken in NASA: https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#top-clause-is-broken 295 ( 296 f"SELECT {param}" 297 if not parameterTypeIsDouble else 298 f"SELECT CAST({param} AS REAL) AS {param}_real" 299 ), 300 f"FROM ps", 301 f"WHERE hostname = '{systemName}' AND {param} IS NOT NULL", 302 "ORDER BY pl_pubdate DESC" 303 )) 304 ) 305 if results: 306 # logger.debug(f"All results for this parameter:\n{results}") 307 return ( 308 results[0].get(param) 309 if not parameterTypeIsDouble else 310 results[0].get(f"{param}_real") 311 ) 312 else: 313 return None 314 315 316def getPlanetaryParameterFromNASA( 317 planetName: str, 318 param: str, 319 parameterTypeIsDouble: bool = False 320) -> Optional[Any]: 321 """ 322 Get the latest (*the newest*) published planetary parameter 323 from NASA database. 324 325 The `parameterTypeIsDouble` argument is a workaround for the problem with 326 [inconsistent values](https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#float-values-are-rounded-on-select-but-compared-to-originals-in-where) 327 in `SELECT`/`WHERE`. 328 329 Example: 330 331 ``` py 332 from uio.utility.databases import tap 333 334 doubles = tap.getParametersThatAreDoubleInNASA() 335 param = "pl_massj" 336 val = tap.getPlanetaryParameterFromNASA( 337 "Kepler-11 b", 338 param, 339 parameterTypeIsDouble=(param in doubles) 340 ) 341 print(val) 342 ``` 343 """ 344 results = queryService( 345 getServiceEndpoint("nasa"), 346 " ".join(( 347 # TOP is broken in NASA: https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#top-clause-is-broken 348 ( 349 f"SELECT {param}" 350 if not parameterTypeIsDouble else 351 f"SELECT CAST({param} AS REAL) AS {param}_real" 352 ), 353 f"FROM ps", 354 f"WHERE pl_name = '{planetName}' AND {param} IS NOT NULL", 355 "ORDER BY pl_pubdate DESC" 356 )) 357 ) 358 if results: 359 # logger.debug(f"All results for this parameter:\n{results}") 360 return ( 361 results[0].get(param) 362 if not parameterTypeIsDouble else 363 results[0].get(f"{param}_real") 364 ) 365 else: 366 return None 367 368 369def getPlanetaryParameterReferenceFromNASA( 370 planetName: str, 371 paramName: str, 372 paramValue: int | float | str, 373 parameterTypeIsDouble: bool = False, 374 tryToReExecuteIfNoResults: bool = True, 375 returnOriginalReferenceOnFailureToExtract: bool = True 376) -> Optional[Any]: 377 """ 378 Get the publication reference for the given planetary parameter value 379 from NASA database. 380 381 The `parameterTypeIsDouble` argument is a workaround for the problem with 382 [inconsistent values](https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#float-values-are-rounded-on-select-but-compared-to-originals-in-where) 383 in `SELECT`/`WHERE`. 384 385 Example: 386 387 ``` py 388 from uio.utility.databases import tap 389 390 doubles = tap.getParametersThatAreDoubleInNASA() 391 param = "pl_massj" 392 val = tap.getPlanetaryParameterReferenceFromNASA( 393 "KOI-4777.01", 394 param, 395 0.31212, 396 parameterTypeIsDouble=(param in doubles) 397 ) 398 print(val) 399 ``` 400 """ 401 fullRefValue: Optional[str] = None 402 403 if tryToReExecuteIfNoResults and not parameterTypeIsDouble: 404 logger.warning( 405 " ".join(( 406 "The re-execution flag is passed, but parameter type", 407 "is not double, so the query will not be re-executed" 408 )) 409 ) 410 411 parameterIsString: bool = False 412 if isinstance(paramValue, str): 413 # logger.debug(f"The {paramName} value {paramValue} is a string") 414 parameterIsString = True 415 results = queryService( 416 getServiceEndpoint("nasa"), 417 " ".join(( 418 f"SELECT pl_refname", # TOP is broken in NASA: https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#top-clause-is-broken 419 f"FROM ps", 420 f"WHERE pl_name = '{planetName}' AND {paramName}", 421 ( 422 f"= {paramValue}" 423 if not parameterIsString else 424 f"= '{paramValue}'" 425 ), 426 "ORDER BY pl_pubdate DESC" 427 )) 428 ) 429 if results: 430 # logger.debug(f"All results:\n{results}") 431 fullRefValue = results[0].get("pl_refname") 432 # might be because of that doubles precision problem, thank you, NASA 433 elif ( 434 results is None 435 and 436 parameterTypeIsDouble 437 and 438 tryToReExecuteIfNoResults 439 ): 440 logger.warning( 441 " ".join(( 442 "The query returned no results, will try to execute again,", 443 "but this time with the parameter casted from double to string" 444 )) 445 ) 446 paramValueLength = len(str(paramValue)) 447 paramValue = cast(float, paramValue) 448 paramValueString = conversion.floatToStringForADQLcastVarchar( 449 paramValue, 450 dropLeadingZero=True 451 ) 452 results = queryService( 453 getServiceEndpoint("nasa"), 454 " ".join(( 455 f"SELECT pl_refname", # TOP is broken in NASA: https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#top-clause-is-broken 456 f"FROM ps", 457 f"WHERE pl_name = '{planetName}'", 458 " ".join(( 459 f"AND CAST({paramName} AS VARCHAR({paramValueLength}))", 460 f"LIKE '{paramValueString}'" 461 )), 462 "ORDER BY pl_pubdate DESC" 463 )) 464 ) 465 if results: 466 # logger.debug(f"All results:\n{results}") 467 fullRefValue = results[0].get("pl_refname") 468 else: 469 return None 470 471 if fullRefValue is not None: 472 ref = extraction.adsRefFromFullReferenceNASA(fullRefValue) 473 if ref is None and returnOriginalReferenceOnFailureToExtract: 474 return fullRefValue 475 return ref 476 else: 477 return None 478 479 480def getParameterFromNASA( 481 systemName: str, 482 planetName: str, 483 param: str, 484 parameterTypeIsDouble: bool = False 485) -> Optional[Any]: 486 """ 487 Get the latest (*the newest*) published parameter from NASA database. 488 The parameter kind (*stellar or planetary*) is determined 489 based on the `uio.utility.databases.tap.mappings` list. This might be 490 convenient when one only has a list of parameters names 491 without specifying which one is of which kind. 492 493 The `parameterTypeIsDouble` argument is a workaround for the problem with 494 [inconsistent values](https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#float-values-are-rounded-on-select-but-compared-to-originals-in-where) 495 in `SELECT`/`WHERE`. 496 497 Example: 498 499 ``` py 500 from uio.utility.databases import tap 501 502 systemName = "Kepler-11" 503 planetName = "Kepler-11 b" 504 params = ["st_teff", "pl_massj"] 505 doubles = tap.getParametersThatAreDoubleInNASA() 506 for p in params: 507 val = tap.getParameterFromNASA( 508 systemName, 509 planetName, 510 p, 511 parameterTypeIsDouble=(p in doubles) 512 ) 513 print(val) 514 ``` 515 """ 516 result = None 517 if param in mappings["NASA-to-PADC"]["stars"]: # get stellar parameter 518 result = getStellarParameterFromNASA( 519 systemName, 520 param, 521 parameterTypeIsDouble 522 ) 523 else: # get planetary parameter 524 result = getPlanetaryParameterFromNASA( 525 planetName, 526 param, 527 parameterTypeIsDouble 528 ) 529 return result 530 531 532def getParameterErrorsFromNASA( 533 systemName: str, 534 planetName: str, 535 param: str 536) -> Tuple[Optional[float], Optional[float]]: 537 """ 538 Get the latest (*the newest*) published stellar or planetary 539 parameter errors from NASA database. This is a convenience function 540 that uses `uio.utility.databases.tap.getParameterFromNASA` 541 to get `PARAMerr2` (*minimum error*) and `PARAMerr1` (*maximum error*). 542 543 Example: 544 545 ``` py 546 from uio.utility.databases import tap 547 548 systemName = "Kepler-11" 549 planetName = "Kepler-11 b" 550 params = ["st_teff", "pl_massj"] 551 for p in params: 552 errMin, errMax = tap.getParameterErrorsFromNASA( 553 systemName, 554 planetName, 555 p 556 ) 557 print(errMin, errMax) 558 ``` 559 """ 560 errMin = getParameterFromNASA(systemName, planetName, f"{param}err2") 561 errMax = getParameterFromNASA(systemName, planetName, f"{param}err1") 562 return errMin, errMax 563 564 565def getParameterFromPADC( 566 planetName: str, 567 param: str 568) -> Optional[Any]: 569 """ 570 Get stellar or planetary parameter from PADC database. 571 572 Example: 573 574 ``` py 575 from uio.utility.databases import tap 576 577 val = tap.getParameterFromPADC("Kepler-11 b", "mass") 578 print(val) 579 ``` 580 """ 581 results = queryService( 582 getServiceEndpoint("padc"), 583 " ".join(( 584 f"SELECT {param}", 585 f"FROM exoplanet.epn_core", 586 f"WHERE granule_uid = '{planetName}' AND {param} IS NOT NULL" 587 )) 588 ) 589 if results: 590 return results[0].get(param) 591 else: 592 return None 593 594 595def getParameterErrorsFromPADC( 596 planetName: str, 597 param: str 598) -> Tuple[Optional[float], Optional[float]]: 599 """ 600 Get stellar or planetary parameter errors from PADC database. 601 This is a convenience function that uses 602 `uio.utility.databases.tap.getParameterFromPADC` to get `PARAM_error_min` 603 and `PARAM_error_max`. 604 605 Example: 606 607 ``` py 608 from uio.utility.databases import tap 609 610 errMin, errMax = tap.getParameterErrorsFromPADC("Kepler-11 b", "mass") 611 print(errMin, errMax) 612 ``` 613 """ 614 errMin = getParameterFromPADC(planetName, f"{param}_error_min") 615 errMax = getParameterFromPADC(planetName, f"{param}_error_max") 616 return errMin, errMax 617 618 619def getStellarParameterFromSimbadByMainID( 620 mainID: str, 621 table: str, 622 param: str, 623) -> Optional[tuple[Any, str]]: 624 """ 625 Get the latest (*the newest*) published stellar parameter from SIMBAD 626 by using the main ID - star name that is chosen to be stored in `main_id` 627 field of the `basic` table. 628 629 Example: 630 631 ``` py 632 from uio.utility.databases import tap 633 634 val, ref = tap.getStellarParameterFromSimbadByMainID( 635 "CD-29 2360", 636 "mesVar", 637 "period" 638 ) 639 print(f"Value: {val}, reference: {ref}") 640 ``` 641 """ 642 results = queryService( 643 getServiceEndpoint("simbad"), 644 " ".join(( 645 f"SELECT TOP 1 v.{param}, v.bibcode", 646 f"FROM {table} AS v", 647 "JOIN basic AS b ON v.oidref = b.oid", 648 f"WHERE b.main_id = '{mainID}' AND {param} IS NOT NULL", 649 "ORDER BY bibcode DESC" 650 )) 651 ) 652 if results: 653 return ( 654 results[0].get(param), 655 results[0].get("bibcode") 656 ) 657 else: 658 return None 659 660 661def getStellarParameterFromSimbadByObjectID( 662 objectID: int, 663 table: str, 664 param: str 665) -> Optional[tuple[Any, str]]: 666 """ 667 Get the latest (*the newest*) published stellar parameter from SIMBAD 668 by using the SIMBAD's object ID. 669 670 If you only have the star name, then first you will need to find 671 the object ID with `uio.utility.databases.simbad.getObjectID`. 672 673 Example: 674 675 ``` py 676 from uio.utility.databases import simbad 677 from uio.utility.databases import tap 678 679 oid = simbad.getObjectID("PPM 725297") 680 if not oid: 681 print("Could not find SIMBAD object ID") 682 else: 683 val, ref = tap.getStellarParameterFromSimbadByObjectID( 684 oid, 685 "mesVar", 686 "period" 687 ) 688 print(f"Value: {val}, reference: {ref}") 689 ``` 690 691 There is also a convenience function `uio.utility.databases.simbad.getStellarParameter`. 692 """ 693 results = queryService( 694 getServiceEndpoint("simbad"), 695 " ".join(( 696 f"SELECT TOP 1 {param}, bibcode", 697 f"FROM {table}", 698 f"WHERE oidref = {objectID} AND {param} IS NOT NULL", 699 "ORDER BY bibcode DESC" 700 )) 701 ) 702 if results: 703 return ( 704 results[0].get(param), 705 results[0].get("bibcode") 706 ) 707 else: 708 return None
Dictionary of the most common TAP services.
Mapping tables columns between different databases.
117def getServiceEndpoint(tapServiceName: str) -> str: 118 """ 119 Get TAP service endpoint by the service/database name. 120 121 Example: 122 123 ``` py 124 from uio.utility.databases import tap 125 126 tapServiceEndpoint = tap.getServiceEndpoint("padc") 127 print(tapServiceEndpoint) 128 ``` 129 """ 130 tapService = services.get(tapServiceName) 131 if tapService: 132 tapServiceEndpoint = tapService.get("endpoint") 133 if tapServiceEndpoint: 134 return tapServiceEndpoint 135 else: 136 raise ValueError( 137 f"The [{tapServiceName}] service has no registered endpoint" 138 ) 139 else: 140 raise ValueError( 141 f"There is no TAP service under the name [{tapServiceName}]" 142 )
Get TAP service endpoint by the service/database name.
Example:
from uio.utility.databases import tap
tapServiceEndpoint = tap.getServiceEndpoint("padc")
print(tapServiceEndpoint)
145def escapeSpecialCharactersForAdql(rawQuery: str) -> str: 146 """ 147 Escape certain special characters in ADQL query. For now only escapes 148 a single quote character. 149 150 Example: 151 152 ``` py 153 from uio.utility.databases import tap 154 155 rawQuery = " ".join(( 156 "SELECT oid FROM basic", 157 "WHERE main_id = 'NAME Teegarden's Star'", 158 "AND main_id != 'someone else's star'" 159 )) 160 print(rawQuery) 161 escapedQuery = tap.escapeSpecialCharactersForAdql(rawQuery) 162 print(escapedQuery) 163 ``` 164 """ 165 escapedQuery: str = re.sub( 166 r"('([^']*)(')([^']*)')", 167 r"'\g<2>'\g<3>\g<4>'", 168 rawQuery 169 ) 170 return escapedQuery
Escape certain special characters in ADQL query. For now only escapes a single quote character.
Example:
from uio.utility.databases import tap
rawQuery = " ".join((
"SELECT oid FROM basic",
"WHERE main_id = 'NAME Teegarden's Star'",
"AND main_id != 'someone else's star'"
))
print(rawQuery)
escapedQuery = tap.escapeSpecialCharactersForAdql(rawQuery)
print(escapedQuery)
173def queryService( 174 tapEndpoint: str, 175 adqlQuery: str, 176 tryToReExecuteOnFailure: bool = True 177) -> Optional[pyvo.dal.tap.TAPResults]: 178 """ 179 Send [ADQL](https://ivoa.net/documents/ADQL/) request to the TAP service 180 and return results. Those can be then converted to 181 a [Pandas](https://pandas.pydata.org) table. 182 183 Example: 184 185 ``` py 186 from uio.utility.databases import tap 187 188 tbl = tap.queryService( 189 tap.getServiceEndpoint("padc"), 190 " ".join(( 191 "SELECT star_name, granule_uid, mass, radius, period, semi_major_axis", 192 "FROM exoplanet.epn_core", 193 "WHERE star_name = 'Kepler-107'", 194 "ORDER BY granule_uid" 195 )) 196 ) 197 if tbl: 198 print(tbl.to_table().to_pandas()) 199 else: 200 print("No results") 201 ``` 202 """ 203 tapService = pyvo.dal.TAPService(tapEndpoint) 204 logger.debug(f"ADQL query to execute: {adqlQuery}") 205 results = None 206 try: 207 results = tapService.search(adqlQuery) 208 except pyvo.dal.exceptions.DALQueryError as ex: 209 if tryToReExecuteOnFailure: 210 logger.warning( 211 " ".join(( 212 "The query failed, will try to execute again,", 213 "but this time with escaped characters. Original", 214 f"error message: {ex}" 215 )) 216 ) 217 adqlQueryEscaped = escapeSpecialCharactersForAdql(adqlQuery) 218 logger.debug(f"Escaped ADQL query to execute: {adqlQueryEscaped}") 219 results = tapService.search(adqlQueryEscaped) 220 else: 221 raise 222 if results is not None and len(results) > 0: 223 logger.debug(f"Results: {len(results)}") 224 return results 225 else: 226 return None
Send ADQL request to the TAP service and return results. Those can be then converted to a Pandas table.
Example:
from uio.utility.databases import tap
tbl = tap.queryService(
tap.getServiceEndpoint("padc"),
" ".join((
"SELECT star_name, granule_uid, mass, radius, period, semi_major_axis",
"FROM exoplanet.epn_core",
"WHERE star_name = 'Kepler-107'",
"ORDER BY granule_uid"
))
)
if tbl:
print(tbl.to_table().to_pandas())
else:
print("No results")
229def getParametersThatAreDoubleInNASA() -> List[str]: 230 """ 231 Get the list of parameters names in the NASA `ps` table that have 232 the `double` type. That is needed so you knew when to apply 233 `CAST(PARAMETER_NAME_HERE AS REAL)` in your `SELECT` statements 234 or `CAST(PARAMETER_NAME_HERE AS VARCHAR(30))` in your `WHERE` 235 statements, otherwise NASA returns rounded values by default 236 (according to their `format` value in `tap_schema.columns`), 237 so you might not get the expected results. 238 239 Example: 240 241 ``` py 242 from uio.utility.databases import tap 243 244 doubles = tap.getParametersThatAreDoubleInNASA() 245 print(doubles) 246 ``` 247 """ 248 doubles: List[str] = list() 249 250 results = queryService( 251 getServiceEndpoint("nasa"), 252 " ".join(( 253 f"SELECT column_name", 254 f"FROM tap_schema.columns", 255 f"WHERE table_name = 'ps' AND datatype = 'double'" 256 )) 257 ) 258 if results: 259 doubles = list(results.getcolumn("column_name").flatten()) 260 261 return doubles
Get the list of parameters names in the NASA ps
table that have
the double
type. That is needed so you knew when to apply
CAST(PARAMETER_NAME_HERE AS REAL)
in your SELECT
statements
or CAST(PARAMETER_NAME_HERE AS VARCHAR(30))
in your WHERE
statements, otherwise NASA returns rounded values by default
(according to their format
value in tap_schema.columns
),
so you might not get the expected results.
Example:
from uio.utility.databases import tap
doubles = tap.getParametersThatAreDoubleInNASA()
print(doubles)
264def getStellarParameterFromNASA( 265 systemName: str, 266 param: str, 267 parameterTypeIsDouble: bool = False 268) -> Optional[Any]: 269 """ 270 Get the latest (*the newest*) published stellar parameter 271 from NASA database. 272 273 The `parameterTypeIsDouble` argument is a workaround for the problem with 274 [inconsistent values](https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#float-values-are-rounded-on-select-but-compared-to-originals-in-where) 275 in `SELECT`/`WHERE`. 276 277 Example: 278 279 ``` py 280 from uio.utility.databases import tap 281 282 doubles = tap.getParametersThatAreDoubleInNASA() 283 param = "st_teff" 284 val = tap.getStellarParameterFromNASA( 285 "Kepler-11", 286 param, 287 parameterTypeIsDouble=(param in doubles) 288 ) 289 print(val) 290 ``` 291 """ 292 results = queryService( 293 getServiceEndpoint("nasa"), 294 " ".join(( 295 # TOP is broken in NASA: https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#top-clause-is-broken 296 ( 297 f"SELECT {param}" 298 if not parameterTypeIsDouble else 299 f"SELECT CAST({param} AS REAL) AS {param}_real" 300 ), 301 f"FROM ps", 302 f"WHERE hostname = '{systemName}' AND {param} IS NOT NULL", 303 "ORDER BY pl_pubdate DESC" 304 )) 305 ) 306 if results: 307 # logger.debug(f"All results for this parameter:\n{results}") 308 return ( 309 results[0].get(param) 310 if not parameterTypeIsDouble else 311 results[0].get(f"{param}_real") 312 ) 313 else: 314 return None
Get the latest (the newest) published stellar parameter from NASA database.
The parameterTypeIsDouble
argument is a workaround for the problem with
inconsistent values
in SELECT
/WHERE
.
Example:
from uio.utility.databases import tap
doubles = tap.getParametersThatAreDoubleInNASA()
param = "st_teff"
val = tap.getStellarParameterFromNASA(
"Kepler-11",
param,
parameterTypeIsDouble=(param in doubles)
)
print(val)
317def getPlanetaryParameterFromNASA( 318 planetName: str, 319 param: str, 320 parameterTypeIsDouble: bool = False 321) -> Optional[Any]: 322 """ 323 Get the latest (*the newest*) published planetary parameter 324 from NASA database. 325 326 The `parameterTypeIsDouble` argument is a workaround for the problem with 327 [inconsistent values](https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#float-values-are-rounded-on-select-but-compared-to-originals-in-where) 328 in `SELECT`/`WHERE`. 329 330 Example: 331 332 ``` py 333 from uio.utility.databases import tap 334 335 doubles = tap.getParametersThatAreDoubleInNASA() 336 param = "pl_massj" 337 val = tap.getPlanetaryParameterFromNASA( 338 "Kepler-11 b", 339 param, 340 parameterTypeIsDouble=(param in doubles) 341 ) 342 print(val) 343 ``` 344 """ 345 results = queryService( 346 getServiceEndpoint("nasa"), 347 " ".join(( 348 # TOP is broken in NASA: https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#top-clause-is-broken 349 ( 350 f"SELECT {param}" 351 if not parameterTypeIsDouble else 352 f"SELECT CAST({param} AS REAL) AS {param}_real" 353 ), 354 f"FROM ps", 355 f"WHERE pl_name = '{planetName}' AND {param} IS NOT NULL", 356 "ORDER BY pl_pubdate DESC" 357 )) 358 ) 359 if results: 360 # logger.debug(f"All results for this parameter:\n{results}") 361 return ( 362 results[0].get(param) 363 if not parameterTypeIsDouble else 364 results[0].get(f"{param}_real") 365 ) 366 else: 367 return None
Get the latest (the newest) published planetary parameter from NASA database.
The parameterTypeIsDouble
argument is a workaround for the problem with
inconsistent values
in SELECT
/WHERE
.
Example:
from uio.utility.databases import tap
doubles = tap.getParametersThatAreDoubleInNASA()
param = "pl_massj"
val = tap.getPlanetaryParameterFromNASA(
"Kepler-11 b",
param,
parameterTypeIsDouble=(param in doubles)
)
print(val)
370def getPlanetaryParameterReferenceFromNASA( 371 planetName: str, 372 paramName: str, 373 paramValue: int | float | str, 374 parameterTypeIsDouble: bool = False, 375 tryToReExecuteIfNoResults: bool = True, 376 returnOriginalReferenceOnFailureToExtract: bool = True 377) -> Optional[Any]: 378 """ 379 Get the publication reference for the given planetary parameter value 380 from NASA database. 381 382 The `parameterTypeIsDouble` argument is a workaround for the problem with 383 [inconsistent values](https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#float-values-are-rounded-on-select-but-compared-to-originals-in-where) 384 in `SELECT`/`WHERE`. 385 386 Example: 387 388 ``` py 389 from uio.utility.databases import tap 390 391 doubles = tap.getParametersThatAreDoubleInNASA() 392 param = "pl_massj" 393 val = tap.getPlanetaryParameterReferenceFromNASA( 394 "KOI-4777.01", 395 param, 396 0.31212, 397 parameterTypeIsDouble=(param in doubles) 398 ) 399 print(val) 400 ``` 401 """ 402 fullRefValue: Optional[str] = None 403 404 if tryToReExecuteIfNoResults and not parameterTypeIsDouble: 405 logger.warning( 406 " ".join(( 407 "The re-execution flag is passed, but parameter type", 408 "is not double, so the query will not be re-executed" 409 )) 410 ) 411 412 parameterIsString: bool = False 413 if isinstance(paramValue, str): 414 # logger.debug(f"The {paramName} value {paramValue} is a string") 415 parameterIsString = True 416 results = queryService( 417 getServiceEndpoint("nasa"), 418 " ".join(( 419 f"SELECT pl_refname", # TOP is broken in NASA: https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#top-clause-is-broken 420 f"FROM ps", 421 f"WHERE pl_name = '{planetName}' AND {paramName}", 422 ( 423 f"= {paramValue}" 424 if not parameterIsString else 425 f"= '{paramValue}'" 426 ), 427 "ORDER BY pl_pubdate DESC" 428 )) 429 ) 430 if results: 431 # logger.debug(f"All results:\n{results}") 432 fullRefValue = results[0].get("pl_refname") 433 # might be because of that doubles precision problem, thank you, NASA 434 elif ( 435 results is None 436 and 437 parameterTypeIsDouble 438 and 439 tryToReExecuteIfNoResults 440 ): 441 logger.warning( 442 " ".join(( 443 "The query returned no results, will try to execute again,", 444 "but this time with the parameter casted from double to string" 445 )) 446 ) 447 paramValueLength = len(str(paramValue)) 448 paramValue = cast(float, paramValue) 449 paramValueString = conversion.floatToStringForADQLcastVarchar( 450 paramValue, 451 dropLeadingZero=True 452 ) 453 results = queryService( 454 getServiceEndpoint("nasa"), 455 " ".join(( 456 f"SELECT pl_refname", # TOP is broken in NASA: https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#top-clause-is-broken 457 f"FROM ps", 458 f"WHERE pl_name = '{planetName}'", 459 " ".join(( 460 f"AND CAST({paramName} AS VARCHAR({paramValueLength}))", 461 f"LIKE '{paramValueString}'" 462 )), 463 "ORDER BY pl_pubdate DESC" 464 )) 465 ) 466 if results: 467 # logger.debug(f"All results:\n{results}") 468 fullRefValue = results[0].get("pl_refname") 469 else: 470 return None 471 472 if fullRefValue is not None: 473 ref = extraction.adsRefFromFullReferenceNASA(fullRefValue) 474 if ref is None and returnOriginalReferenceOnFailureToExtract: 475 return fullRefValue 476 return ref 477 else: 478 return None
Get the publication reference for the given planetary parameter value from NASA database.
The parameterTypeIsDouble
argument is a workaround for the problem with
inconsistent values
in SELECT
/WHERE
.
Example:
from uio.utility.databases import tap
doubles = tap.getParametersThatAreDoubleInNASA()
param = "pl_massj"
val = tap.getPlanetaryParameterReferenceFromNASA(
"KOI-4777.01",
param,
0.31212,
parameterTypeIsDouble=(param in doubles)
)
print(val)
481def getParameterFromNASA( 482 systemName: str, 483 planetName: str, 484 param: str, 485 parameterTypeIsDouble: bool = False 486) -> Optional[Any]: 487 """ 488 Get the latest (*the newest*) published parameter from NASA database. 489 The parameter kind (*stellar or planetary*) is determined 490 based on the `uio.utility.databases.tap.mappings` list. This might be 491 convenient when one only has a list of parameters names 492 without specifying which one is of which kind. 493 494 The `parameterTypeIsDouble` argument is a workaround for the problem with 495 [inconsistent values](https://decovar.dev/blog/2022/02/26/astronomy-databases-tap-adql/#float-values-are-rounded-on-select-but-compared-to-originals-in-where) 496 in `SELECT`/`WHERE`. 497 498 Example: 499 500 ``` py 501 from uio.utility.databases import tap 502 503 systemName = "Kepler-11" 504 planetName = "Kepler-11 b" 505 params = ["st_teff", "pl_massj"] 506 doubles = tap.getParametersThatAreDoubleInNASA() 507 for p in params: 508 val = tap.getParameterFromNASA( 509 systemName, 510 planetName, 511 p, 512 parameterTypeIsDouble=(p in doubles) 513 ) 514 print(val) 515 ``` 516 """ 517 result = None 518 if param in mappings["NASA-to-PADC"]["stars"]: # get stellar parameter 519 result = getStellarParameterFromNASA( 520 systemName, 521 param, 522 parameterTypeIsDouble 523 ) 524 else: # get planetary parameter 525 result = getPlanetaryParameterFromNASA( 526 planetName, 527 param, 528 parameterTypeIsDouble 529 ) 530 return result
Get the latest (the newest) published parameter from NASA database.
The parameter kind (stellar or planetary) is determined
based on the uio.utility.databases.tap.mappings
list. This might be
convenient when one only has a list of parameters names
without specifying which one is of which kind.
The parameterTypeIsDouble
argument is a workaround for the problem with
inconsistent values
in SELECT
/WHERE
.
Example:
from uio.utility.databases import tap
systemName = "Kepler-11"
planetName = "Kepler-11 b"
params = ["st_teff", "pl_massj"]
doubles = tap.getParametersThatAreDoubleInNASA()
for p in params:
val = tap.getParameterFromNASA(
systemName,
planetName,
p,
parameterTypeIsDouble=(p in doubles)
)
print(val)
533def getParameterErrorsFromNASA( 534 systemName: str, 535 planetName: str, 536 param: str 537) -> Tuple[Optional[float], Optional[float]]: 538 """ 539 Get the latest (*the newest*) published stellar or planetary 540 parameter errors from NASA database. This is a convenience function 541 that uses `uio.utility.databases.tap.getParameterFromNASA` 542 to get `PARAMerr2` (*minimum error*) and `PARAMerr1` (*maximum error*). 543 544 Example: 545 546 ``` py 547 from uio.utility.databases import tap 548 549 systemName = "Kepler-11" 550 planetName = "Kepler-11 b" 551 params = ["st_teff", "pl_massj"] 552 for p in params: 553 errMin, errMax = tap.getParameterErrorsFromNASA( 554 systemName, 555 planetName, 556 p 557 ) 558 print(errMin, errMax) 559 ``` 560 """ 561 errMin = getParameterFromNASA(systemName, planetName, f"{param}err2") 562 errMax = getParameterFromNASA(systemName, planetName, f"{param}err1") 563 return errMin, errMax
Get the latest (the newest) published stellar or planetary
parameter errors from NASA database. This is a convenience function
that uses uio.utility.databases.tap.getParameterFromNASA
to get PARAMerr2
(minimum error) and PARAMerr1
(maximum error).
Example:
from uio.utility.databases import tap
systemName = "Kepler-11"
planetName = "Kepler-11 b"
params = ["st_teff", "pl_massj"]
for p in params:
errMin, errMax = tap.getParameterErrorsFromNASA(
systemName,
planetName,
p
)
print(errMin, errMax)
566def getParameterFromPADC( 567 planetName: str, 568 param: str 569) -> Optional[Any]: 570 """ 571 Get stellar or planetary parameter from PADC database. 572 573 Example: 574 575 ``` py 576 from uio.utility.databases import tap 577 578 val = tap.getParameterFromPADC("Kepler-11 b", "mass") 579 print(val) 580 ``` 581 """ 582 results = queryService( 583 getServiceEndpoint("padc"), 584 " ".join(( 585 f"SELECT {param}", 586 f"FROM exoplanet.epn_core", 587 f"WHERE granule_uid = '{planetName}' AND {param} IS NOT NULL" 588 )) 589 ) 590 if results: 591 return results[0].get(param) 592 else: 593 return None
Get stellar or planetary parameter from PADC database.
Example:
from uio.utility.databases import tap
val = tap.getParameterFromPADC("Kepler-11 b", "mass")
print(val)
596def getParameterErrorsFromPADC( 597 planetName: str, 598 param: str 599) -> Tuple[Optional[float], Optional[float]]: 600 """ 601 Get stellar or planetary parameter errors from PADC database. 602 This is a convenience function that uses 603 `uio.utility.databases.tap.getParameterFromPADC` to get `PARAM_error_min` 604 and `PARAM_error_max`. 605 606 Example: 607 608 ``` py 609 from uio.utility.databases import tap 610 611 errMin, errMax = tap.getParameterErrorsFromPADC("Kepler-11 b", "mass") 612 print(errMin, errMax) 613 ``` 614 """ 615 errMin = getParameterFromPADC(planetName, f"{param}_error_min") 616 errMax = getParameterFromPADC(planetName, f"{param}_error_max") 617 return errMin, errMax
Get stellar or planetary parameter errors from PADC database.
This is a convenience function that uses
uio.utility.databases.tap.getParameterFromPADC
to get PARAM_error_min
and PARAM_error_max
.
Example:
from uio.utility.databases import tap
errMin, errMax = tap.getParameterErrorsFromPADC("Kepler-11 b", "mass")
print(errMin, errMax)
620def getStellarParameterFromSimbadByMainID( 621 mainID: str, 622 table: str, 623 param: str, 624) -> Optional[tuple[Any, str]]: 625 """ 626 Get the latest (*the newest*) published stellar parameter from SIMBAD 627 by using the main ID - star name that is chosen to be stored in `main_id` 628 field of the `basic` table. 629 630 Example: 631 632 ``` py 633 from uio.utility.databases import tap 634 635 val, ref = tap.getStellarParameterFromSimbadByMainID( 636 "CD-29 2360", 637 "mesVar", 638 "period" 639 ) 640 print(f"Value: {val}, reference: {ref}") 641 ``` 642 """ 643 results = queryService( 644 getServiceEndpoint("simbad"), 645 " ".join(( 646 f"SELECT TOP 1 v.{param}, v.bibcode", 647 f"FROM {table} AS v", 648 "JOIN basic AS b ON v.oidref = b.oid", 649 f"WHERE b.main_id = '{mainID}' AND {param} IS NOT NULL", 650 "ORDER BY bibcode DESC" 651 )) 652 ) 653 if results: 654 return ( 655 results[0].get(param), 656 results[0].get("bibcode") 657 ) 658 else: 659 return None
Get the latest (the newest) published stellar parameter from SIMBAD
by using the main ID - star name that is chosen to be stored in main_id
field of the basic
table.
Example:
from uio.utility.databases import tap
val, ref = tap.getStellarParameterFromSimbadByMainID(
"CD-29 2360",
"mesVar",
"period"
)
print(f"Value: {val}, reference: {ref}")
662def getStellarParameterFromSimbadByObjectID( 663 objectID: int, 664 table: str, 665 param: str 666) -> Optional[tuple[Any, str]]: 667 """ 668 Get the latest (*the newest*) published stellar parameter from SIMBAD 669 by using the SIMBAD's object ID. 670 671 If you only have the star name, then first you will need to find 672 the object ID with `uio.utility.databases.simbad.getObjectID`. 673 674 Example: 675 676 ``` py 677 from uio.utility.databases import simbad 678 from uio.utility.databases import tap 679 680 oid = simbad.getObjectID("PPM 725297") 681 if not oid: 682 print("Could not find SIMBAD object ID") 683 else: 684 val, ref = tap.getStellarParameterFromSimbadByObjectID( 685 oid, 686 "mesVar", 687 "period" 688 ) 689 print(f"Value: {val}, reference: {ref}") 690 ``` 691 692 There is also a convenience function `uio.utility.databases.simbad.getStellarParameter`. 693 """ 694 results = queryService( 695 getServiceEndpoint("simbad"), 696 " ".join(( 697 f"SELECT TOP 1 {param}, bibcode", 698 f"FROM {table}", 699 f"WHERE oidref = {objectID} AND {param} IS NOT NULL", 700 "ORDER BY bibcode DESC" 701 )) 702 ) 703 if results: 704 return ( 705 results[0].get(param), 706 results[0].get("bibcode") 707 ) 708 else: 709 return None
Get the latest (the newest) published stellar parameter from SIMBAD by using the SIMBAD's object ID.
If you only have the star name, then first you will need to find
the object ID with uio.utility.databases.simbad.getObjectID
.
Example:
from uio.utility.databases import simbad
from uio.utility.databases import tap
oid = simbad.getObjectID("PPM 725297")
if not oid:
print("Could not find SIMBAD object ID")
else:
val, ref = tap.getStellarParameterFromSimbadByObjectID(
oid,
"mesVar",
"period"
)
print(f"Value: {val}, reference: {ref}")
There is also a convenience function uio.utility.databases.simbad.getStellarParameter
.