Source code for grblc.search.gcn.parser.sentence

import os
from .constants import sentence_check, mag_check, flux_check


[docs]def check_sentence(grb_listing):
    """
    Check if there are sentences that include possible data points in grb_listing.
    """
    return sentence_check.search(grb_listing) != None


[docs]def get_final_sentences_txt(grb, output_path):
    """
    Fetch the data from [grb]_sentences.txt and select only the paragraphs
    with the possible data points identified by the regex, sentence_check.
    """
    
    # Get the lines of the text file.
    
    # Look through only sentence GCN  ### Doesn't always catch sentences - Nicole, July 13
    #lines = open(f"{output_path}{grb}/{grb}_sentences.txt", 'r').read()

    # Look through all GCNs
    lines = open(f"{output_path}{grb}/{grb}_all_gcn.txt", 'r').read()
   
    GCNs = lines.split('=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=')
    GCNs = [GCN.split('\n\n') for GCN in GCNs]

    # we now have a list of GCNs, where each GCN is a list of paragraphs.
    all_data = []
    data = {}
    
    mag_data = {}
    
 

    # PHASE 1: Loop through the paragraphs and store the sentences with data points that we are interested in to all_data.
    for GCN_paragraphs in GCNs:

        for paragraph in GCN_paragraphs: 

            lines = paragraph.split('\n')

            for line in lines:

                # If line contains "NUMBER: "
                if "NUMBER: " in line:

                    # Add data only if it is not an empty dictionary.
                    if data:
                        all_data.append(data)

                    data = {}
                    data["number"] = line.strip("NUMBERS: ")
                    data["sentences"] = ""
                    continue
                
                # If there is a sentence matched in the line, add the entire paragraph.
                match = sentence_check.search(line)
                if match:
                    data["sentences"] += paragraph + "\n"
                    break
                    
                       
                        
        # Matches magnitudes in sentences              
        for paragraph in GCN_paragraphs:
            
            magMatch = mag_check.findall(paragraph)
            fluxMatch = flux_check.findall(paragraph)
           
            if magMatch:
                
                for entry in magMatch:
                    
                    # Append matches to dictionary w/ gcn as key
                    if data['number'] in mag_data:
                        mag_data[data['number']].append(entry)
                    else: 
                        mag_data[data['number']] = [entry]
            print(fluxMatch)
      
    # Take the information from mag_data dictionary and append to a .txt file
    datMag = []
    mag_data_sort = []
    mag_from_sentences = open(f'{output_path}{grb}/{grb}_sentences_mag.txt','w+')
    mag_from_sentences.write(str('gcn')+str('\t')+str('mag')+str('\t')+str('mag_err')+str('\t')+str('band')+str('\n'))
    
    for key in mag_data:
       
        gcnNum = key
        
        for sublst in mag_data[key]:

            band = sublst[4]
            mag = sublst[5]
            mag_err = sublst[7]
            
            mag_from_sentences.write(str(gcnNum)+str('\t')+str(mag)+str('\t')+str(mag_err)+str('\t')+str(band)+str('\n'))
        
    mag_from_sentences.close()
  

    # PHASE 2: Write the data into *_final_sentences.txt.
    file = open(f"{output_path}{grb}/{grb}_final_sentences.txt", 'w')
    prev_num = 0

    for data in all_data:

        # If the previous table and the current table is in the same gcn,
        # do not print out the header again.
        if prev_num == data['number']:
            result = ""
        else:
            result = "=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=\n\n"
            result += f"Number: {data['number']}\n"

        result += data['sentences'] + "\n"
        prev_num = data['number']
        file.write(result)

    # close the *_final_sentences.txt and remove the original *_sentences.txt.
    file.close()
    os.remove(f"{output_path}{grb}/{grb}_sentences.txt")
    return all_data


[docs]def final_sentences_to_csv(grb, output_path):
    pass