tasks.reconfirming_stellar_parameters
Reconfirming stellar parameters, enriching original datasets with additional data from various data sources.
1""" 2Reconfirming stellar parameters, enriching original datasets 3with additional data from various data sources. 4""" 5 6import pandas 7import numpy 8# import json 9 10from typing import Optional, List, Dict 11 12try: 13 from ..utils.databases import tap, simbad 14except ImportError: 15 # what the hell is even that, for using the installed package 16 # imports need to be done way, but for generating documentation 17 # with pdoc it needs to be a different way 18 from utils.databases import tap, simbad 19 20 21def lookForParametersInGaia( 22 originalTable: pandas.DataFrame, 23 adqlTable: str, 24 adqlParameters: List[str], 25 simbadIDversion: Optional[str] = None 26) -> pandas.DataFrame: 27 """ 28 Looking for specified parameters in GAIA database: 29 30 1. Opens a pickle file with original [Pandas](https://pandas.pydata.org) 31 table; 32 2. Extracts unique list of star names; 33 3. Gets their GAIA IDs from Simbad database; 34 4. Queries GAIA database for given parameters; 35 5. Adds found parameters to the original table as new columns. 36 37 Example: 38 39 ``` py 40 from phab.utils.files import pickle 41 from phab.tasks import reconfirming_stellar_parameters 42 43 originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl") 44 tbl = reconfirming_stellar_parameters.lookForParametersInGaia( 45 originalTable, 46 "gaiadr3.astrophysical_parameters", 47 [ 48 "age_flame", 49 "logg_gspphot", 50 "mass_flame", 51 "mh_gspphot", 52 "mh_gspspec", 53 "radius_flame", 54 "radius_gspphot", 55 "teff_esphs", 56 "teff_espucd", 57 "teff_gspphot", 58 "teff_gspspec", 59 "teff_msc1", 60 "ew_espels_halpha", 61 "ew_espels_halpha_model" 62 ], 63 "dr3" 64 ) 65 ``` 66 67 You might need to provide `simbadIDversion` parameter (*the `dr3` value 68 here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`) 69 returns IDs like `DR3 2135237601028549888` and you need to get exactly 70 the DR3 ones. 71 72 As a result, your original table `tbl` will be enriched with additional 73 columns according to the list of provided astrophysical parameters. 74 """ 75 76 starNames = originalTable["star_name"].unique() 77 78 print("\nGetting GAIA IDs from SIMBAD...\n") 79 80 stars: Dict[str, Optional[str]] = {} 81 for star in starNames: 82 oid = simbad.findIdentificatorFromAnotherCatalogue( 83 star, 84 "gaia", 85 simbadIDversion 86 ) 87 if oid is None: 88 print(f"- [WARNING] did not GAIA ID for [{star}]") 89 else: 90 print(f"- found GAIA ID for [{star}]: {oid}") 91 stars[star] = oid 92 93 # print(json.dumps(stars, indent=4)) 94 95 print("\nLooking for parameters in GAIA...\n") 96 97 for parameter in adqlParameters: 98 originalTable[parameter] = numpy.array(numpy.nan, dtype=float) 99 100 foundCnt = 0 101 for star in stars: 102 gaiaID = stars[star] 103 print(f"- {star} | {gaiaID}...") 104 resultsGAIA = tap.queryService( 105 tap.getServiceEndpoint("gaia"), 106 " ".join(( 107 f"SELECT {', '.join(adqlParameters)}", 108 f"FROM {adqlTable}", 109 f"WHERE source_id = {gaiaID}" 110 )) 111 ) 112 if resultsGAIA is None: 113 print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]") 114 else: 115 tbl = resultsGAIA.to_table().to_pandas() 116 foundCnt += 1 117 if len(tbl) > 1: 118 print( 119 " ".join(( 120 "- [WARNING] GAIA has more than one record", 121 f"for ID [{gaiaID}], will take only the first one" 122 )) 123 ) 124 # add found values to the new columns in the original table 125 for parameter in adqlParameters: 126 originalTable.loc[ 127 originalTable["star_name"] == star, 128 parameter 129 ] = tbl.head(1)[parameter][0] 130 131 print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n") 132 133 return originalTable
def
lookForParametersInGaia( originalTable: pandas.core.frame.DataFrame, adqlTable: str, adqlParameters: List[str], simbadIDversion: Optional[str] = None) -> pandas.core.frame.DataFrame:
22def lookForParametersInGaia( 23 originalTable: pandas.DataFrame, 24 adqlTable: str, 25 adqlParameters: List[str], 26 simbadIDversion: Optional[str] = None 27) -> pandas.DataFrame: 28 """ 29 Looking for specified parameters in GAIA database: 30 31 1. Opens a pickle file with original [Pandas](https://pandas.pydata.org) 32 table; 33 2. Extracts unique list of star names; 34 3. Gets their GAIA IDs from Simbad database; 35 4. Queries GAIA database for given parameters; 36 5. Adds found parameters to the original table as new columns. 37 38 Example: 39 40 ``` py 41 from phab.utils.files import pickle 42 from phab.tasks import reconfirming_stellar_parameters 43 44 originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl") 45 tbl = reconfirming_stellar_parameters.lookForParametersInGaia( 46 originalTable, 47 "gaiadr3.astrophysical_parameters", 48 [ 49 "age_flame", 50 "logg_gspphot", 51 "mass_flame", 52 "mh_gspphot", 53 "mh_gspspec", 54 "radius_flame", 55 "radius_gspphot", 56 "teff_esphs", 57 "teff_espucd", 58 "teff_gspphot", 59 "teff_gspspec", 60 "teff_msc1", 61 "ew_espels_halpha", 62 "ew_espels_halpha_model" 63 ], 64 "dr3" 65 ) 66 ``` 67 68 You might need to provide `simbadIDversion` parameter (*the `dr3` value 69 here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`) 70 returns IDs like `DR3 2135237601028549888` and you need to get exactly 71 the DR3 ones. 72 73 As a result, your original table `tbl` will be enriched with additional 74 columns according to the list of provided astrophysical parameters. 75 """ 76 77 starNames = originalTable["star_name"].unique() 78 79 print("\nGetting GAIA IDs from SIMBAD...\n") 80 81 stars: Dict[str, Optional[str]] = {} 82 for star in starNames: 83 oid = simbad.findIdentificatorFromAnotherCatalogue( 84 star, 85 "gaia", 86 simbadIDversion 87 ) 88 if oid is None: 89 print(f"- [WARNING] did not GAIA ID for [{star}]") 90 else: 91 print(f"- found GAIA ID for [{star}]: {oid}") 92 stars[star] = oid 93 94 # print(json.dumps(stars, indent=4)) 95 96 print("\nLooking for parameters in GAIA...\n") 97 98 for parameter in adqlParameters: 99 originalTable[parameter] = numpy.array(numpy.nan, dtype=float) 100 101 foundCnt = 0 102 for star in stars: 103 gaiaID = stars[star] 104 print(f"- {star} | {gaiaID}...") 105 resultsGAIA = tap.queryService( 106 tap.getServiceEndpoint("gaia"), 107 " ".join(( 108 f"SELECT {', '.join(adqlParameters)}", 109 f"FROM {adqlTable}", 110 f"WHERE source_id = {gaiaID}" 111 )) 112 ) 113 if resultsGAIA is None: 114 print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]") 115 else: 116 tbl = resultsGAIA.to_table().to_pandas() 117 foundCnt += 1 118 if len(tbl) > 1: 119 print( 120 " ".join(( 121 "- [WARNING] GAIA has more than one record", 122 f"for ID [{gaiaID}], will take only the first one" 123 )) 124 ) 125 # add found values to the new columns in the original table 126 for parameter in adqlParameters: 127 originalTable.loc[ 128 originalTable["star_name"] == star, 129 parameter 130 ] = tbl.head(1)[parameter][0] 131 132 print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n") 133 134 return originalTable
Looking for specified parameters in GAIA database:
- Opens a pickle file with original Pandas table;
- Extracts unique list of star names;
- Gets their GAIA IDs from Simbad database;
- Queries GAIA database for given parameters;
- Adds found parameters to the original table as new columns.
Example:
from phab.utils.files import pickle
from phab.tasks import reconfirming_stellar_parameters
originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
originalTable,
"gaiadr3.astrophysical_parameters",
[
"age_flame",
"logg_gspphot",
"mass_flame",
"mh_gspphot",
"mh_gspspec",
"radius_flame",
"radius_gspphot",
"teff_esphs",
"teff_espucd",
"teff_gspphot",
"teff_gspspec",
"teff_msc1",
"ew_espels_halpha",
"ew_espels_halpha_model"
],
"dr3"
)
You might need to provide simbadIDversion parameter (the dr3 value
here) if SIMBAD (utils.databases.simbad.findIdentificatorFromAnotherCatalogue)
returns IDs like DR3 2135237601028549888 and you need to get exactly
the DR3 ones.
As a result, your original table tbl will be enriched with additional
columns according to the list of provided astrophysical parameters.