tasks.reconfirming_stellar_parameters
Reconfirming stellar parameters, enriching original datasets with additional data from various data sources.
1""" 2Reconfirming stellar parameters, enriching original datasets 3with additional data from various data sources. 4""" 5 6import pandas 7import numpy 8# import json 9 10from typing import Optional, List, Dict 11 12try: 13 from ..utils.files import pickle 14 from ..utils.databases import tap, simbad 15except ImportError: 16 # what the hell is even that, for using the installed package 17 # imports need to be done way, but for generating documentation 18 # with pdoc it needs to be a different way 19 from utils.files import pickle 20 from utils.databases import tap, simbad 21 22 23def lookForParametersInGaia( 24 originalTable: pandas.DataFrame, 25 adqlTable: str, 26 adqlParameters: List[str], 27 simbadIDversion: Optional[str] = None 28) -> pandas.DataFrame: 29 """ 30 Looking for specified parameters in GAIA database: 31 32 1. Opens a pickle file with original [Pandas](https://pandas.pydata.org) 33 table; 34 2. Extracts unique list of star names; 35 3. Gets their GAIA IDs from Simbad database; 36 4. Queries GAIA database for given parameters; 37 5. Adds found parameters to the original table as new columns. 38 39 Example: 40 41 ``` py 42 from phab.utils.files import pickle 43 from phab.tasks import reconfirming_stellar_parameters 44 45 originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl") 46 tbl = reconfirming_stellar_parameters.lookForParametersInGaia( 47 originalTable, 48 "gaiadr3.astrophysical_parameters", 49 [ 50 "age_flame", 51 "logg_gspphot", 52 "mass_flame", 53 "mh_gspphot", 54 "mh_gspspec", 55 "radius_flame", 56 "radius_gspphot", 57 "teff_esphs", 58 "teff_espucd", 59 "teff_gspphot", 60 "teff_gspspec", 61 "teff_msc1", 62 "ew_espels_halpha", 63 "ew_espels_halpha_model" 64 ], 65 "dr3" 66 ) 67 ``` 68 69 You might need to provide `simbadIDversion` parameter (*the `dr3` value 70 here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`) 71 returns IDs like `DR3 2135237601028549888` and you need to get exactly 72 the DR3 ones. 73 74 As a result, your original table `tbl` will be enriched with additional 75 columns according to the list of provided astrophysical parameters. 76 """ 77 78 starNames = originalTable["star_name"].unique() 79 80 print("\nGetting GAIA IDs from SIMBAD...\n") 81 82 stars: Dict[str, Optional[str]] = {} 83 for star in starNames: 84 oid = simbad.findIdentificatorFromAnotherCatalogue( 85 star, 86 "gaia", 87 simbadIDversion 88 ) 89 if oid is None: 90 print(f"- [WARNING] did not GAIA ID for [{star}]") 91 else: 92 print(f"- found GAIA ID for [{star}]: {oid}") 93 stars[star] = oid 94 95 # print(json.dumps(stars, indent=4)) 96 97 print("\nLooking for parameters in GAIA...\n") 98 99 for parameter in adqlParameters: 100 originalTable[parameter] = numpy.array(numpy.nan, dtype=float) 101 102 foundCnt = 0 103 for star in stars: 104 gaiaID = stars[star] 105 print(f"- {star} | {gaiaID}...") 106 resultsGAIA = tap.queryService( 107 tap.getServiceEndpoint("gaia"), 108 " ".join(( 109 f"SELECT {', '.join(adqlParameters)}", 110 f"FROM {adqlTable}", 111 f"WHERE source_id = {gaiaID}" 112 )) 113 ) 114 if resultsGAIA is None: 115 print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]") 116 else: 117 tbl = resultsGAIA.to_table().to_pandas() 118 foundCnt += 1 119 if len(tbl) > 1: 120 print( 121 " ".join(( 122 "- [WARNING] GAIA has more than one record", 123 f"for ID [{gaiaID}], will take only the first one" 124 )) 125 ) 126 # add found values to the new columns in the original table 127 for parameter in adqlParameters: 128 originalTable.loc[ 129 originalTable["star_name"] == star, 130 parameter 131 ] = tbl.head(1)[parameter][0] 132 133 print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n") 134 135 return originalTable
def
lookForParametersInGaia( originalTable: pandas.core.frame.DataFrame, adqlTable: str, adqlParameters: List[str], simbadIDversion: Optional[str] = None) -> pandas.core.frame.DataFrame:
24def lookForParametersInGaia( 25 originalTable: pandas.DataFrame, 26 adqlTable: str, 27 adqlParameters: List[str], 28 simbadIDversion: Optional[str] = None 29) -> pandas.DataFrame: 30 """ 31 Looking for specified parameters in GAIA database: 32 33 1. Opens a pickle file with original [Pandas](https://pandas.pydata.org) 34 table; 35 2. Extracts unique list of star names; 36 3. Gets their GAIA IDs from Simbad database; 37 4. Queries GAIA database for given parameters; 38 5. Adds found parameters to the original table as new columns. 39 40 Example: 41 42 ``` py 43 from phab.utils.files import pickle 44 from phab.tasks import reconfirming_stellar_parameters 45 46 originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl") 47 tbl = reconfirming_stellar_parameters.lookForParametersInGaia( 48 originalTable, 49 "gaiadr3.astrophysical_parameters", 50 [ 51 "age_flame", 52 "logg_gspphot", 53 "mass_flame", 54 "mh_gspphot", 55 "mh_gspspec", 56 "radius_flame", 57 "radius_gspphot", 58 "teff_esphs", 59 "teff_espucd", 60 "teff_gspphot", 61 "teff_gspspec", 62 "teff_msc1", 63 "ew_espels_halpha", 64 "ew_espels_halpha_model" 65 ], 66 "dr3" 67 ) 68 ``` 69 70 You might need to provide `simbadIDversion` parameter (*the `dr3` value 71 here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`) 72 returns IDs like `DR3 2135237601028549888` and you need to get exactly 73 the DR3 ones. 74 75 As a result, your original table `tbl` will be enriched with additional 76 columns according to the list of provided astrophysical parameters. 77 """ 78 79 starNames = originalTable["star_name"].unique() 80 81 print("\nGetting GAIA IDs from SIMBAD...\n") 82 83 stars: Dict[str, Optional[str]] = {} 84 for star in starNames: 85 oid = simbad.findIdentificatorFromAnotherCatalogue( 86 star, 87 "gaia", 88 simbadIDversion 89 ) 90 if oid is None: 91 print(f"- [WARNING] did not GAIA ID for [{star}]") 92 else: 93 print(f"- found GAIA ID for [{star}]: {oid}") 94 stars[star] = oid 95 96 # print(json.dumps(stars, indent=4)) 97 98 print("\nLooking for parameters in GAIA...\n") 99 100 for parameter in adqlParameters: 101 originalTable[parameter] = numpy.array(numpy.nan, dtype=float) 102 103 foundCnt = 0 104 for star in stars: 105 gaiaID = stars[star] 106 print(f"- {star} | {gaiaID}...") 107 resultsGAIA = tap.queryService( 108 tap.getServiceEndpoint("gaia"), 109 " ".join(( 110 f"SELECT {', '.join(adqlParameters)}", 111 f"FROM {adqlTable}", 112 f"WHERE source_id = {gaiaID}" 113 )) 114 ) 115 if resultsGAIA is None: 116 print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]") 117 else: 118 tbl = resultsGAIA.to_table().to_pandas() 119 foundCnt += 1 120 if len(tbl) > 1: 121 print( 122 " ".join(( 123 "- [WARNING] GAIA has more than one record", 124 f"for ID [{gaiaID}], will take only the first one" 125 )) 126 ) 127 # add found values to the new columns in the original table 128 for parameter in adqlParameters: 129 originalTable.loc[ 130 originalTable["star_name"] == star, 131 parameter 132 ] = tbl.head(1)[parameter][0] 133 134 print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n") 135 136 return originalTable
Looking for specified parameters in GAIA database:
- Opens a pickle file with original Pandas table;
- Extracts unique list of star names;
- Gets their GAIA IDs from Simbad database;
- Queries GAIA database for given parameters;
- Adds found parameters to the original table as new columns.
Example:
from phab.utils.files import pickle
from phab.tasks import reconfirming_stellar_parameters
originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
originalTable,
"gaiadr3.astrophysical_parameters",
[
"age_flame",
"logg_gspphot",
"mass_flame",
"mh_gspphot",
"mh_gspspec",
"radius_flame",
"radius_gspphot",
"teff_esphs",
"teff_espucd",
"teff_gspphot",
"teff_gspspec",
"teff_msc1",
"ew_espels_halpha",
"ew_espels_halpha_model"
],
"dr3"
)
You might need to provide simbadIDversion
parameter (the dr3
value
here) if SIMBAD (utils.databases.simbad.findIdentificatorFromAnotherCatalogue
)
returns IDs like DR3 2135237601028549888
and you need to get exactly
the DR3 ones.
As a result, your original table tbl
will be enriched with additional
columns according to the list of provided astrophysical parameters.