Source code for grblc.search.gcn.parser.table

import re
import os
from .constants import dash_line, table_entry_check, sentence_check


[docs]def check_table(grb_listing): """ Return if there is any tables in grb_listing. """ # Lines of the grb_listing lines = grb_listing.split('\n') lines = [line for line in lines if line and not line.isspace()] # filter empty lines # The bool for checking if previous line is also matched as a table entry prev = False for line in lines: # First, check if there is a dash line. # Usually when there is a dash line, there is a table. match = dash_line.search(line) if match: return True # Second, check if there is a group of at least two table entries. table_match = table_entry_check.search(line) sentence_match = sentence_check.search(line) duplicate = table_match and sentence_match if table_match and not duplicate and prev: return True prev = not duplicate return False
[docs]def get_final_tables_txt(grb, output_path): """ Fetch the data from the generated text file, *_table.txt. Store gcn numbers, dates, and tables and write the data accordingly into *_final.txt. """ # Get the lines of the text file. lines = open(f"{output_path}{grb}/{grb}_table.txt", 'r').read().split('\n') lines = [line for line in lines if line and not line.isspace()] # filter empty lines allData = [] data = {} # PHASE 1: Loop through the lines and store the data we are interested in to allData. while lines: line = lines.pop(0) # If line contains "NUMBER: " if "NUMBER: " in line: num = line.strip("NUMBERS: ") continue # Check if the line is matched as a table entry, and if it is not matched as a sentence. table_entry_match = table_entry_check.search(line) table = [] # If there is a match, keep matching until the table ends. while table_entry_match: table.append(re.split("\t|\s{2,}", line)) # If the table entry is the end of the file, lines will be an empty list. # Make sure we are not poping from an empty list. if lines: line = lines.pop(0) # Check if the line is matched as a table entry, and if it is not matched as a sentence. table_entry_match = table_entry_check.search(line) # Break the while loop, if there is no line anymore. else: break # If there is a group of at least two table entries, we add data to allData. # The reason that we have to store num and date is that there might be two # tables in a gcn. if len(table) >= 2: data["number"] = num data["table"] = list(map(lambda t: '\t'.join(t), table)) allData.append(data) data = {} # PHASE 2: Write the data into *_final_table.txt. file = open(f"{output_path}{grb}/{grb}_final_table.txt", 'w') prev_num = 0 for data in allData: # If the previous table and the current table is in the same gcn, # do not print out the header again. if prev_num == data['number']: result = "" else: result = "=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=\n\n" result += f"Number: {data['number']}\n" result += '\n'.join(data['table']) + '\n\n' prev_num = data['number'] file.write(result) # close the *_final_table.txt and remove the original *_table.txt. file.close() os.remove(f"{output_path}{grb}/{grb}_table.txt") return allData
[docs]def final_tables_to_csv(grb, output_path="output/"): pass