import requests, re, os, sys
from .config import read_apikey
from .ECHO import SynchronizedEcho
import concurrent.futures, warnings
from ads import SearchQuery
[docs]def getArticles(finds, threading=True, debug=False):
"""
User function to create a single string containing seperated text bodies from a
list of `ads.search.Article`'s.
:param papers:
A list of ADS articles to download.
:type papers:
:class:`list` of `ads.search.Article`
:param threading:
Boolean to specify the use of concurrency.
:type threading:
:class:`bool`
:returns:
String containing each GCN separated by a line.
"""
papers = finds["articlelist"]
GRB = finds["GRB"]
if len(papers) == 0:
return r"No articles found! ¯\(°_o)/¯"
articlelist = []
if threading:
threads = min(30, len(papers))
_wrapped_getArticle = lambda article: getArticle(articlelist, article, GRB, debug=debug)
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(_wrapped_getArticle, papers)
executor.shutdown()
else:
articlelist = [getArticle(articlelist, paper, GRB, debug=debug) for paper in papers]
if "gcn" in papers[0].bibcode.lower():
result = "\n=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=\n\n".join(articlelist)
else:
result = articlelist
ECHO(f"[{GRB}] {len(result)}/{len(papers)} saved.")
return result
[docs]def prepareGRB(GRB):
if GRB[-1].isalpha():
finalchar = GRB[-1]
allbutfinal = GRB[:-1]
else:
finalchar = None
allbutfinal = GRB
if len(allbutfinal) < 6:
finalGRB = "0" * (6 - len(allbutfinal)) + allbutfinal
if finalchar:
finalGRB += finalchar
else:
finalGRB = GRB
return finalGRB
[docs]def getGRBComboQuery(GRB):
"""
Get the several versions of a GRB name that could come up in ADS searches.
E.g., 010222A, 10222A, GRB010222A, GRB1022A
:param GRB:
The GRB to get name combinations of.
:type GRB:
:class:`str`
:returns:
String of GRB name combinations separated by "OR" for search in ADS.
"""
return " OR ".join([f"{GRB}", f"GRB{GRB}"])
[docs]def additionalKeywords(keywords):
"""
Convert keyword(s) to a string to use in an ADS query.
:param keywords:
Keywords to specifically search for in addition to the GRB.
:type keywords:
:class:`list`,`tuple`,`str`
:returns:
String of keyword(s) separated by an "AND" for use in an ADS query.
"""
if not isinstance(keywords, (type(None), list, tuple)):
keywords = (keywords,)
if keywords:
keywordquery = " AND ".join(keywords)
query = f"full:({keywordquery})"
else:
query = ""
return query
[docs]def gcnSearch(GRB, keywords=None, printlength=True, debug=False):
"""
User function to find GCNs containing the inputted GRB and optional
keywords
:param GRB:
GRB name; e.g., '010222' or '200205A'
:type GRB:
:class:`str`
:param keywords:
Keywords to specifically search for in addition to the GRB.
:type keywords:
:class:`list`,`tuple`,`str`
:param printlength:
Determines whether the user would like the number of articles found to be printed.
:type printlength:
:class:`bool`
:returns:
A list of `ads.search.Article`'s containing GCNs pertaining to GRB and optional
keywords.
"""
if keywords is not None:
warnings.warn("Keywords aren't working correctly right now.", stacklevel=2)
assert isinstance(GRB, str), "GRB is not of type string."
query = f"bibstem:GCN {getGRBComboQuery(GRB)}"
keywords = additionalKeywords(keywords)
finds = list(SearchQuery(q=f"{query + keywords}", fl=["bibcode", "identifier"]))
if debug:
ECHO(f"[{GRB}] Query: {query + keywords}")
if printlength:
ECHO(f"[{GRB}] {len(finds)} candidates.")
return finds
[docs]def litSearch(GRB, keywords=None, printlength=True, debug=False):
"""
User function to find literature containing the inputted GRB and optional
keywords
:param GRB:
GRB name; e.g., '010222' or '200205A'
:type GRB:
:class:`str`
:param keywords:
Keywords to specifically search for in addition to the GRB.
:type keywords:
:class:`list`,`tuple`,`str`
:param printlength:
Determines whether the user would like the number of articles found to be printed.
:type printlength:
:class:`bool`
:returns:
A list of `ads.search.Article`'s containing GCNs pertaining to GRB and optional
keywords.
"""
assert isinstance(GRB, str), "GRB is not of type string."
GRB = prepareGRB(GRB)
query = getGRBComboQuery(GRB)
keywords = additionalKeywords(keywords)
fullquery = f"title:{query} OR abstract:{query} OR keyword:{query} {keywords} -bibstem:GCN"
finds = list(SearchQuery(q=fullquery, fl=["bibcode", "identifier", "title", "author", "year"], rows=100))
if (printlength or debug) and len(finds) > 0:
ECHO(f"[{GRB}] {len(finds)} found.")
if debug:
ECHO(f"[{GRB}] Query: '{fullquery}'")
ECHO(f"Finds: {', '.join([find.bibcode for find in finds])}")
return {"GRB": GRB, "articlelist": finds}
[docs]def getArticle(articlelist, article, GRB, firsttry=True, debug=False):
"""
Download an article from arXiv or other sources.
:param articlelist:
The string list to append article texts to.
:type articlelist:
:class:`list`
:param article:
The ADS article to retrieve.
:type article:
:class:`ads.search.Article`
:returns:
Nothing. Side effect of appending text of article body to articlelist.
Modified from https://github.com/andycasey/ads/blob/master/examples/monthly-institute-publications/stromlo.py#22
"""
if debug:
ECHO(f"[{GRB}] Retrieving {article.bibcode}")
isGCN = "GCN" in article.bibcode
header = {"Authorization": f"Bearer {read_apikey()}"}
# Ask ADS to redirect us to the journal article.
params = {"bibcode": article.bibcode}
if isGCN:
params["link_type"] = "EJOURNAL"
else:
params["link_type"] = "ESOURCE"
if isGCN:
url = requests.get("http://adsabs.harvard.edu/cgi-bin/nph-data_query", params=params).url
q = requests.get(url)
else:
url = f"https://api.adsabs.harvard.edu/v1/resolver/{article.bibcode}/esource"
q = requests.get(
url,
headers=header,
allow_redirects=False,
)
if not q.ok:
if debug:
ECHO(f"[{GRB}] Pass 1: Error retrieving {article.bibcode} ({q.status_code}): https://ui.adsabs.harvard.edu/abs/{article.bibcode}/abstract.")
q.raise_for_status()
return
else:
return
deserialized = q.json()
pdf_header = {"user-agent": f"adsgrb/{__version__}"}
try:
records = deserialized["links"]["records"]
for record in records:
linktype = record["link_type"]
link = record["url"]
if "PDF" in linktype and not "doi.org" in link and not "$" in link:
# switch any arxiv url to export.arxiv.org so we don't get locked out
url = link.replace("arxiv.org", "export.arxiv.org")
q = requests.get(url, stream=True, headers=pdf_header)
break
# record is guaranteed to be of length > 0
elif record == records[-1]:
ECHO(f"[{GRB}] Could not find suitable link for {article.bibcode}. {link}")
return
except:
linktype = deserialized["link_type"]
if "PDF" in linktype and not "doi.org" in link and not "$" in link:
# switch any arxiv url to export.arxiv.org so we don't get locked out
url = deserialized["link"].replace("arxiv.org", "export.arxiv.org")
q = requests.get(url, stream=True, headers=pdf_header)
else:
ECHO(f"[{GRB}] Pass 2: No suitable link for {article.bibcode}. {link}")
return
if not q.ok:
ECHO(f"[{GRB}] Pass 2: Error retrieving {article.bibcode} ({q.status_code}): {url}")
if debug:
q.raise_for_status()
return
else:
return
# Check if the journal has given back forbidden HTML.
try:
if "</html>" in q.content.lower() or not str(q.content):
ECHO(f"[{GRB}] Pass 2: Error retrieving {article.bibcode} (200): {url}")
if firsttry and "arxiv" in url:
ECHO(f"[{GRB}] Pass 2: Trying again for {article.bibcode}")
getArticle(articlelist, article, GRB, firsttry=False, debug=debug)
else:
return
except:
if "</html>" in q.text.lower() or not str(q.text):
ECHO(f"[{GRB}] Pass 2: Error retrieving {article.bibcode} (200): {url}")
return
if isGCN:
articlelist.append(q.text)
else:
articlelist.append([q.content, article.title, article.year, url])
ECHO = SynchronizedEcho()
major, minor1, minor2, release, serial = sys.version_info
readfile_kwargs = {"encoding": "utf-8"} if major >= 3 else {}
[docs]def readfile(filename):
with open(filename, **readfile_kwargs) as fp:
contents = fp.read()
return contents
version_regex = re.compile('__version__ = "(.*?)"')
contents = readfile(os.path.join(os.path.dirname(os.path.abspath(__file__)), "__init__.py"))
__version__ = version_regex.findall(contents)[0]