mirror of
https://github.com/kuhyx/WUT_Computer_Science.git
synced 2026-07-04 15:03:08 +02:00
quick fix
This commit is contained in:
parent
581cb90466
commit
56650422d8
@ -3,19 +3,22 @@ import processing
|
|||||||
import re
|
import re
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
# output file name
|
# input file name
|
||||||
file_path = "headlines_fixed_format.wa"
|
file_path = "alignments_unformatted_headlines.txt"
|
||||||
|
|
||||||
# paths to students andsewrs database
|
# output file path
|
||||||
#chunked_path1 = "test_goldStandard/student/STSint.testinput.answers-students.sent1.chunk.txt"
|
output_file_path = "headlines_fixed_format.wa"
|
||||||
#chunked_path2 = "test_goldStandard/student/STSint.testinput.answers-students.sent2.chunk.txt"
|
|
||||||
#alignment_path = "test_goldStandard/student/STSint.testinput.answers-students.wa"
|
|
||||||
|
|
||||||
# paths to headlines database
|
# paths to headlines database
|
||||||
chunked_path1 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
chunked_path1 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
||||||
chunked_path2 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
chunked_path2 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
||||||
alignment_path = "test_goldStandard/headlines/STSint.testinput.headlines.wa"
|
alignment_path = "test_goldStandard/headlines/STSint.testinput.headlines.wa"
|
||||||
|
|
||||||
|
# paths to students andsewrs database
|
||||||
|
#chunked_path1 = "test_goldStandard/student/STSint.testinput.answers-students.sent1.chunk.txt"
|
||||||
|
#chunked_path2 = "test_goldStandard/student/STSint.testinput.answers-students.sent2.chunk.txt"
|
||||||
|
#alignment_path = "test_goldStandard/student/STSint.testinput.answers-students.wa"
|
||||||
|
|
||||||
# load data
|
# load data
|
||||||
goldstandard_chunked = processing.load_chunked(chunked_path1, chunked_path2)
|
goldstandard_chunked = processing.load_chunked(chunked_path1, chunked_path2)
|
||||||
goldstandard_alignment = processing.load_alignment(alignment_path)
|
goldstandard_alignment = processing.load_alignment(alignment_path)
|
||||||
@ -23,8 +26,6 @@ goldstandard_alignment = processing.load_alignment(alignment_path)
|
|||||||
# get a nice table
|
# get a nice table
|
||||||
data = pd.merge(goldstandard_chunked, goldstandard_alignment, left_index=True, right_index=True)
|
data = pd.merge(goldstandard_chunked, goldstandard_alignment, left_index=True, right_index=True)
|
||||||
|
|
||||||
file_path = "alignments_unformatted_headlines.txt"
|
|
||||||
|
|
||||||
# open generated alignments
|
# open generated alignments
|
||||||
with open(file_path, 'r') as file:
|
with open(file_path, 'r') as file:
|
||||||
responses = [eval(line.strip()) for line in file.readlines()]
|
responses = [eval(line.strip()) for line in file.readlines()]
|
||||||
@ -141,7 +142,7 @@ for n, r in enumerate(responses):
|
|||||||
print("rejected indexes:")
|
print("rejected indexes:")
|
||||||
print(rejected_indexes)
|
print(rejected_indexes)
|
||||||
|
|
||||||
with open(file_path, 'w') as file:
|
with open(output_file_path, 'w') as file:
|
||||||
for i, r in zip(indexes, responses_final):
|
for i, r in zip(indexes, responses_final):
|
||||||
file.write("<sentence id=\"" + str(i+1) + "\" status=\"\">\n")
|
file.write("<sentence id=\"" + str(i+1) + "\" status=\"\">\n")
|
||||||
file.write("<alignment>\n")
|
file.write("<alignment>\n")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user