mirror of
https://github.com/kuhyx/WUT_Computer_Science.git
synced 2026-07-04 13:03:05 +02:00
finishing touches
major changes to most of the ChatGPT processing pipeline
This commit is contained in:
parent
c2a77b2b31
commit
581cb90466
375
alignments_unformatted_headlines.txt
Normal file
375
alignments_unformatted_headlines.txt
Normal file
@ -0,0 +1,375 @@
|
||||
'China ==> China // EQUI // 5\ns Peace Ark ==> s Peace Ark // EQUI // 5\ndeparts ==> departs // EQUI // 5\nfor the Philippines ==> for the Philippines // EQUI // 5\nThursday ==> Thursday // EQUI // 5'
|
||||
'Conservatives <==> Conservatives // EQUI // 5\nsweep <==> sweep // EQUI // 5\nto Australia election victory <==> to Australia election victory // EQUI // 5'
|
||||
"Abe ==> Abe // EQUI // 5\n's shrine visit ==> 's shrine visit // EQUI // 5\ndraws wide condemnation criticism ==> draws wide condemnation criticism // EQUI // 5"
|
||||
'Ukraine <==> Ukraine // EQUI // 5\nrevokes <==> revokes // EQUI // 5\nantiprotest law <==> antiprotest law // EQUI // 5\n <==> // NOALI // 0\nPM <==> PM // EQUI // 5\nresigns <==> resigns // EQUI // 5'
|
||||
'Russia ==> Russia // EQUI // 5\nsays ==> says // EQUI // 5\ndetects ==> detects // EQUI // 5\nmissiles ==> missiles // EQUI // 5\nfired ==> fired // EQUI // 5\nfrom Mediterranean Sea ==> from Mediterranean Sea // EQUI // 5'
|
||||
'ProRussian protesters <==> ProRussian protesters // EQUI // 5\nstorm <==> storm // EQUI // 5\nnew police station <==> new police station // EQUI // 5\nin east Ukraine <==> in east Ukraine // EQUI // 5'
|
||||
'Cyclone <==> Cyclone // EQUI // 5\nleaves <==> leaves // EQUI // 5\ntrail <==> trail // EQUI // 5\nof destruction <==> of destruction // EQUI // 5\nin India <==> in India // EQUI // 5'
|
||||
'4 foreigners <==> 4 foreigners // EQUI // 5\namong 9 killed <==> among 9 killed // EQUI // 5\nin Kabul hotel attack <==> in Kabul hotel attack // EQUI // 5'
|
||||
'Many dead ==> Many dead // EQUI // 5\nas asylum boat ==> as asylum boat // EQUI // 5\nsinks ==> sinks // EQUI // 5\noff Australia ==> off Australia // EQUI // 5'
|
||||
'Israeli calls ==> Israeli calls // EQUI // 5\nto execute ==> to execute // EQUI // 5\nPalestinian captives ==> Palestinian captives // EQUI // 5\ninstead of releasing them ==> instead of releasing them // EQUI // 5'
|
||||
'DPRK leader <==> DPRK leader // EQUI // 5\ncalls <==> calls // EQUI // 5\nfor better ties <==> for better ties // EQUI // 5\nwith SKorea <==> with SKorea // EQUI // 5'
|
||||
'World <==> World // EQUI // 5\ns oldest person and oldest man <==> s oldest person and oldest man // EQUI // 5\never <==> ever // EQUI // 5\ndies <==> dies // EQUI // 5\nat 116 <==> at 116 // EQUI // 5'
|
||||
'World powers and Iran ==> World powers and Iran // EQUI // 5\nto resume ==> to resume // EQUI // 5\nexpert nuclear talks ==> expert nuclear talks // EQUI // 5\non Dec 30 ==> on Dec 30 // EQUI // 5'
|
||||
'Biden ==> Biden // EQUI // 5\nwarns ==> warns // EQUI // 5\n ==> // REL // 3\nits ==> its // EQUI // 5\ntime ==> time // EQUI // 5\nfor Russia ==> for Russia // EQUI // 5\nto stop ==> to stop // EQUI // 5\ntalking ==> talking // EQUI // 5\nand ==> and // EQUI // 5\nstart ==> start // EQUI // 5\nacting ==> acting // EQUI // 5\n ==> // REL // 3\nin Ukraine ==> in Ukraine // EQUI // 5'
|
||||
'India Ink <==> India Ink // EQUI // 5\nImage <==> Image // EQUI // 5\nof the Day <==> of the Day // EQUI // 5\nFebruary 25 <==> February 25 // EQUI // 5'
|
||||
'Philippines <==> Philippines // EQUI // 5\ndefers <==> defers // EQUI // 5\ncomment <==> comment // EQUI // 5\non Taiwan fisher attack <==> on Taiwan fisher attack // EQUI // 5'
|
||||
'New Zealand <==> New Zealand // EQUI // 5\nset <==> set // EQUI // 5\nto legalise <==> to legalise // EQUI // 5\ngay marriage <==> gay marriage // EQUI // 5'
|
||||
'Israel s exPM Ariel Sharon ==> Israel s exPM Ariel Sharon // EQUI // 5\ndies ==> dies // EQUI // 5'
|
||||
'Peace talks <==> Peace talks // EQUI // 5\nwith Palestinians <==> with Palestinians // EQUI // 5\nwould take <==> would take // EQUI // 5\nmonths <==> months // EQUI // 5\n <==> // NOALI // 0\nIsrael official <==> Israel official // EQUI // 5'
|
||||
"Ronaldo's hat trick <==> Ronaldo's hat trick // EQUI // 5\nsends <==> sends // EQUI // 5\nPortugal <==> Portugal // EQUI // 5\nto World Cup <==> to World Cup // EQUI // 5"
|
||||
'At Least ==> At Least // EQUI // 5\n22 Dead ==> 22 Dead // EQUI // 5\nAfter China Earthquake ==> After China Earthquake // EQUI // 5'
|
||||
'Canadian freight train blast death toll ==> Canadian freight train blast death toll // EQUI // 5\nhits ==> hits // EQUI // 5\nthree ==> three // EQUI // 5\n ==> // NOALI // 0\nto rise ==> to rise // EQUI // 5'
|
||||
'Five children <==> Five children // EQUI // 5\nburied <==> buried // EQUI // 5\nalive <==> alive // EQUI // 5\nin Bohol landslide <==> in Bohol landslide // EQUI // 5'
|
||||
'Rosberg <==> Rosberg // EQUI // 5\nemulates <==> emulates // EQUI // 5\nfather <==> father // EQUI // 5\nwith Monaco win <==> with Monaco win // EQUI // 5'
|
||||
'Suicide truck bomber <==> Suicide truck bomber // EQUI // 5\nkills <==> kills // EQUI // 5\n11 <==> 11 // EQUI // 5\nin northern Iraq <==> in northern Iraq // EQUI // 5'
|
||||
'Tropical Storm Barry ==> Tropical Storm Barry // EQUI // 5\nforms ==> forms // EQUI // 5\noff Mexico s coast ==> off Mexico s coast // EQUI // 5\nbringing ==> bringing // EQUI // 5\nheavy rain ==> heavy rain // EQUI // 5\nas it ==> as it // EQUI // 5\nheads ==> heads // EQUI // 5\ntoward Veracruz ==> toward Veracruz // EQUI // 5'
|
||||
'Syrian rebels <==> Syrian rebels // EQUI // 5\ntarget <==> target // EQUI // 5\nmilitary airports <==> military airports // EQUI // 5'
|
||||
'66magnitude quake ==> 66magnitude quake // EQUI // 5\nhits ==> hits // EQUI // 5\nSouth Atlantic Ocean ==> South Atlantic Ocean // EQUI // 5\nUSGS ==> USGS // EQUI // 5'
|
||||
'Poverty stats <==> Poverty stats // EQUI // 5\ndoubt <==> doubt // EQUI // 5\nModi <==> Modi // EQUI // 5\ns development claims <==> s development claims // EQUI // 5'
|
||||
'North Korea ==> North Korea // EQUI // 5\nblocks ==> blocks // EQUI // 5\naccess ==> access // EQUI // 5\nto joint industrial zone ==> to joint industrial zone // EQUI // 5'
|
||||
'Pakistan ==> Pakistan // EQUI // 5\ns Musharraf ==> s Musharraf // EQUI // 5\ncharged ==> charged // EQUI // 5\nover Bhutto murder ==> over Bhutto murder // EQUI // 5'
|
||||
'Ukrainian protesters <==> Ukrainian protesters // EQUI // 5\nback <==> back // EQUI // 5\nin streets <==> in streets // EQUI // 5\nfor antigovernment rally <==> for antigovernment rally // EQUI // 5'
|
||||
'Jamaican connection <==> Jamaican connection // EQUI // 5\nin Liverpool <==> in Liverpool // EQUI // 5\ns win <==> s win // EQUI // 5'
|
||||
'[ Rescuers ] <==> [ Rescuers ] // EQUI // 5\n[ scour ] <==> [ scour ] // EQUI // 5\n[ debris ] <==> [ debris ] // EQUI // 5\n[ after India buildings collapse ] <==> [ after India buildings collapse ] // EQUI // 5'
|
||||
'Rock bands <==> Rock bands // EQUI // 5\nplane <==> plane // EQUI // 5\nmakes <==> makes // EQUI // 5\nemergency landing <==> emergency landing // EQUI // 5\nin Ill <==> in Ill // EQUI // 5'
|
||||
'Boston bombing suspect ==> Boston bombing suspect // EQUI // 5\nremains ==> remains // EQUI // 5\nin hospital ==> in hospital // EQUI // 5'
|
||||
'PPP <==> PPP // EQUI // 5\ns Shehla Raza <==> s Shehla Raza // EQUI // 5\nelected <==> elected // EQUI // 5\nDeputy Speaker of SA <==> Deputy Speaker of SA // EQUI // 5'
|
||||
"[1142] <==> [1142] // EQUI // 5\n[Churchill] <==> [Churchill] // EQUI // 5\n['s last child] <==> ['s last child] // EQUI // 5\n[dies] <==> [dies] // EQUI // 5\n[aged 91] <==> [aged 91] // EQUI // 5"
|
||||
'N Korea ==> N Korea // EQUI // 5\ntestfires ==> testfires // EQUI // 5\n30 shortrange missiles ==> 30 shortrange missiles // EQUI // 5\ninto sea ==> into sea // EQUI // 5\n[empty] ==> [empty] // EQUI // 5\nYonhap ==> Yonhap // EQUI // 5'
|
||||
'[ Chinese shares ] <==> [ Chinese shares ] // EQUI // 5\n[ close ] <==> [ close ] // EQUI // 5\n[ higher ] <==> [ higher ] // EQUI // 5\n[ Tuesday ] <==> [ Tuesday ] // EQUI // 5'
|
||||
'North Korea ==> North Korea // EQUI // 5\nshuns ==> shuns // EQUI // 5\noffer ==> offer // EQUI // 5\nof talks ==> of talks // EQUI // 5'
|
||||
'In surprise visit ==> In surprise visit // EQUI // 5\nto Afghanistan ==> to Afghanistan // EQUI // 5\nObama ==> Obama // EQUI // 5\ntells ==> tells // EQUI // 5\ntroops ==> troops // EQUI // 5\nyou ==> you // EQUI // 5\ninspire ==> inspire // EQUI // 5\nme ==> me // EQUI // 5'
|
||||
'[US] <==> [US] // EQUI // 5\n[awaits] <==> [awaits] // EQUI // 5\n[high court judgment] <==> [high court judgment] // EQUI // 5\n[on gay marriage] <==> [on gay marriage] // EQUI // 5'
|
||||
'Sisi ==> Sisi // EQUI // 5\nsworn in ==> sworn in // EQUI // 5\nas Egypt ==> as Egypt // EQUI // 5\ns president ==> s president // EQUI // 5\n=> // NOALI // 0\ncool reception ==> cool reception // EQUI // 5\nfrom West ==> from West // EQUI // 5'
|
||||
'China <==> China // EQUI // 5\nto investigate <==> to investigate // EQUI // 5\nEU wine exports <==> EU wine exports // EQUI // 5'
|
||||
"Eurozone's unemployment ==> Eurozone's unemployment // EQUI // 5\nclimbs ==> climbs // EQUI // 5\nto 121 ==> to 121 // EQUI // 5\na fresh record high ==> a fresh record high // EQUI // 5"
|
||||
'South Korean President <==> South Korean President // EQUI // 5\nSorry <==> Sorry // EQUI // 5\nFor Ferry Response <==> For Ferry Response // EQUI // 5'
|
||||
'ProRussian rebels <==> ProRussian rebels // EQUI // 5\nshoot down <==> shoot down // EQUI // 5\nUkrainian plane <==> Ukrainian plane // EQUI // 5\n49 <==> 49 // EQUI // 5\nkilled <==> killed // EQUI // 5'
|
||||
'[Saudi] <==> [Saudi] // EQUI // 5\n[pushes] <==> [pushes] // EQUI // 5\n[UN resolution] <==> [UN resolution] // EQUI // 5\n[on Syria abuse] <==> [on Syria abuse] // EQUI // 5'
|
||||
'At least <==> At least // EQUI // 5\n45 <==> 45 // EQUI // 5\nkilled <==> killed // EQUI // 5\nin Spain train crash <==> in Spain train crash // EQUI // 5'
|
||||
'19 hurt ==> 19 hurt // EQUI // 5\nin New Orleans shooting ==> in New Orleans shooting // EQUI // 5'
|
||||
'Vatican Embassy <==> Vatican Embassy // EQUI // 5\nin Syria <==> in Syria // EQUI // 5\nhit <==> hit // EQUI // 5\nby mortar fire <==> by mortar fire // EQUI // 5'
|
||||
'The Note <==> The Note // EQUI // 5\ns MustReads <==> s MustReads // EQUI // 5\nfor Friday <==> for Friday // EQUI // 5\nMay 24 2013 <==> May 24 2013 // EQUI // 5'
|
||||
'Nato ==> Nato // EQUI // 5\ncondemns ==> condemns // EQUI // 5\nUkraine crackdown ==> Ukraine crackdown // EQUI // 5'
|
||||
'Israel s Peres ==> Israel s Peres // EQUI // 5\nurges ==> urges // EQUI // 5\nreturn ==> return // EQUI // 5\nto peace talks ==> to peace talks // EQUI // 5'
|
||||
'Assault on Iraq Funeral <==> Assault on Iraq Funeral // EQUI // 5\nOther Attacks <==> Other Attacks // EQUI // 5\nKill 92 <==> Kill 92 // EQUI // 5'
|
||||
'Two NATO soldiers <==> Two NATO soldiers // EQUI // 5\nkilled <==> killed // EQUI // 5\nin Afghanistan <==> in Afghanistan // EQUI // 5'
|
||||
'US top diplomat Kerry ==> US top diplomat Kerry // EQUI // 5\ns wife ==> s wife // EQUI // 5\nrushed ==> rushed // EQUI // 5\nto hospital ==> to hospital // EQUI // 5'
|
||||
'Egypt ==> Egypt // EQUI // 5\ns Brotherhood ==> s Brotherhood // EQUI // 5\nto hold ==> to hold // EQUI // 5\nmarch of anger ==> march of anger // EQUI // 5'
|
||||
'Israel and Hamas ==> Israel and Hamas // EQUI // 5\nto observe ==> to observe // EQUI // 5\nbrief Gaza truce ==> brief Gaza truce // EQUI // 5'
|
||||
'Foreign exchange rates <==> Foreign exchange rates // EQUI // 5\nin Singapore <==> in Singapore // EQUI // 5'
|
||||
'[World] <==> [World] // EQUI // 5\n[s oldest man] <==> [s oldest man] // EQUI // 5\n[dies] <==> [dies] // EQUI // 5\n[at 116] <==> [at 116] // EQUI // 5'
|
||||
'Germany ==> Germany // EQUI // 5\nwelcomes ==> welcomes // EQUI // 5\nIran nuclear agreement ==> Iran nuclear agreement // EQUI // 5'
|
||||
'South Korea <==> South Korea // EQUI // 5\nproposes <==> proposes // EQUI // 5\ntalks <==> talks // EQUI // 5\nwith North <==> with North // EQUI // 5'
|
||||
'[ Video ] <==> [ Video ] // EQUI // 5\n[ shows ] <==> [ shows ] // EQUI // 5\n[ Kenya jihadis ] <==> [ Kenya jihadis ] // EQUI // 5\n[ casually ] <==> [ casually ] // EQUI // 5\n[ killing ] <==> [ killing ] // EQUI // 5\n[ in mall ] <==> [ in mall ] // EQUI // 5'
|
||||
'UN inspectors ==> UN inspectors // EQUI // 5\nhail ==> hail // EQUI // 5\nprogress ==> progress // EQUI // 5\nover Syria chemical weapons ==> over Syria chemical weapons // EQUI // 5'
|
||||
'Iran ==> Iran // EQUI // 5\nrejects ==> rejects // EQUI // 5\nclaim ==> claim // EQUI // 5\non chlorine bombs ==> on chlorine bombs // EQUI // 5'
|
||||
'One dead ==> One dead // EQUI // 5\nin US building collapse ==> in US building collapse // EQUI // 5'
|
||||
'Fallen US firefighters <==> Fallen US firefighters // EQUI // 5\nbrought home <==> brought home // EQUI // 5'
|
||||
'Ship <==> Ship // EQUI // 5\nhunting for <==> hunting for // EQUI // 5\nmore pings <==> more pings // EQUI // 5\nin missing Malaysian plane search <==> in missing Malaysian plane search // EQUI // 5'
|
||||
'TehranLondon ties <==> TehranLondon ties // EQUI // 5\nto improve <==> to improve // EQUI // 5\nAfkham <==> Afkham // EQUI // 5'
|
||||
'Mandela <==> Mandela // EQUI // 5\nresponding <==> responding // EQUI // 5\nto treatment <==> to treatment // EQUI // 5\n[ ] <==> [ ] // NOALI // 0'
|
||||
'Gunmen <==> Gunmen // EQUI // 5\nassassinate <==> assassinate // EQUI // 5\nLibyan deputy industry minister <==> Libyan deputy industry minister // EQUI // 5'
|
||||
'China yuan <==> China yuan // EQUI // 5\nweakens <==> weakens // EQUI // 5\nto 61535 <==> to 61535 // EQUI // 5\nagainst USD <==> against USD // EQUI // 5'
|
||||
'Indonesians <==> Indonesians // EQUI // 5\nburn <==> burn // EQUI // 5\nAustralian flags <==> Australian flags // EQUI // 5\nover spying reports <==> over spying reports // EQUI // 5'
|
||||
'More Deaths ==> More Deaths // EQUI // 5\nExpected ==> Expected // EQUI // 5\nin Canada Oil Train Explosion ==> in Canada Oil Train Explosion // EQUI // 5'
|
||||
'Egypt court ==> Egypt court // EQUI // 5\nsentences ==> sentences // EQUI // 5\n10 Brotherhood supporters ==> 10 Brotherhood supporters // EQUI // 5\nto death ==> to death // EQUI // 5\nsources ==> sources // EQUI // 5'
|
||||
'Declines <==> Declines // EQUI // 5\nin US stock market <==> in US stock market // EQUI // 5\nmoderate <==> moderate // EQUI // 5'
|
||||
'10 Things <==> 10 Things // EQUI // 5\nto Know <==> to Know // EQUI // 5\nThis Week <==> This Week // EQUI // 5\ns Takeaways <==> s Takeaways // EQUI // 5\n <==> // NOALI // 0'
|
||||
'Ukraine protest leaders ==> Ukraine protest leaders // EQUI // 5\nname ==> name // EQUI // 5\nministers ==> ministers // EQUI // 5\nRussian troops ==> Russian troops // EQUI // 5\non alert ==> on alert // EQUI // 5'
|
||||
'US <==> US // EQUI // 5\nRussia <==> Russia // EQUI // 5\nenter <==> enter // EQUI // 5\nthird day <==> third day // EQUI // 5\nof Syria talks <==> of Syria talks // EQUI // 5'
|
||||
'US factory ==> US factory // EQUI // 5\norders ==> orders // EQUI // 5\ndown ==> down // EQUI // 5\n07 pct ==> 07 pct // EQUI // 5\nin January ==> in January // EQUI // 5'
|
||||
'San Diego Mayor Bob Filner <==> San Diego Mayor Bob Filner // EQUI // 5\nSan Diego Mayor Bob Filner <==> accusers // REL // 0\ns <==> NOALI // NOALI // 0'
|
||||
'Nine Dead <==> Nine Dead // EQUI // 5\nIn Oklahoma City <==> In Oklahoma City // EQUI // 5\nAfter Tornadoes <==> After Tornadoes // EQUI // 5\nHit <==> Hit // EQUI // 5'
|
||||
'[ Kenyan forces ] <==> [ Kenyan forces ] // EQUI // 5\n[ caused ] <==> [ caused ] // EQUI // 5\n[ mall collapse ] <==> [ mall collapse ] // EQUI // 5'
|
||||
'Ukraine ==> Ukraine // EQUI // 5\nto implement ==> to implement // EQUI // 5\nunilateral ceasefire ==> unilateral ceasefire // EQUI // 5'
|
||||
'Muslim Brotherhood <==> Muslim Brotherhood // EQUI // 5\nbanned <==> banned // EQUI // 5\nin Egypt <==> in Egypt // EQUI // 5'
|
||||
'Suspect <==> Suspect // EQUI // 5\narrested <==> arrested // EQUI // 5\nin attack <==> in attack // EQUI // 5\non French soldier <==> on French soldier // EQUI // 5'
|
||||
'Obama ==> Obama // EQUI // 5\nUrges ==> Urges // EQUI // 5\nRestraint ==> Restraint // EQUI // 5\nfrom Israelis Palestinians ==> from Israelis Palestinians // EQUI // 5'
|
||||
'Brazilians <==> Brazilians // EQUI // 5\nthrong streets <==> throng streets // EQUI // 5\nto greet <==> to greet // EQUI // 5\npope <==> pope // EQUI // 5'
|
||||
'Quake ==> Quake // EQUI // 5\nshakes ==> shakes // EQUI // 5\nnortheastern Japan ==> northeastern Japan // EQUI // 5\n[ ] ==> [ ] // EQUI // 5\nno tsunami risk ==> no tsunami risk // EQUI // 5'
|
||||
'Detroit ==> Detroit // EQUI // 5\neligible ==> eligible // EQUI // 5\nfor bankruptcy protection ==> for bankruptcy protection // EQUI // 5\nUS judge ==> US judge // EQUI // 5'
|
||||
'Vietnam <==> Vietnam // EQUI // 5\ndetects <==> detects // EQUI // 5\nsignals <==> signals // EQUI // 5\nfrom missing plane <==> from missing plane // EQUI // 5'
|
||||
'Six US soldiers <==> Six US soldiers // EQUI // 5\nkilled <==> killed // EQUI // 5\nin aircraft crash <==> in aircraft crash // EQUI // 5\nin Afghanistan <==> in Afghanistan // EQUI // 5'
|
||||
'[ EU ] [ EU ] // EQUI // 5\n[ offers ] [ offers ] // EQUI // 5\n[ to help ] [ to help ] // EQUI // 5\n[ destroy ] [ destroy ] // EQUI // 5\n[ Syria weapons ] [ Syria weapons ] // EQUI // 5'
|
||||
"Egypt arrests <==> Egypt arrests // EQUI // 5\nMuslim Brotherhood's top leader <==> Muslim Brotherhood's top leader // EQUI // 5"
|
||||
'Arsenal <==> Arsenal // EQUI // 5\nv <==> v // EQUI // 5\nStoke City <==> Stoke City // EQUI // 5\nlive <==> live // EQUI // 5'
|
||||
'London Marathon runners ==> London Marathon runners // EQUI // 5\nhonour ==> honour // EQUI // 5\nBoston victims ==> Boston victims // EQUI // 5'
|
||||
'Today ==> Today // EQUI // 5\nin History ==> in History // EQUI // 5\nDec 2 ==> Dec 2 // EQUI // 5'
|
||||
'Desmond Tutu ==> Desmond Tutu // EQUI // 5\nsays ==> says // EQUI // 5\nhe ==> he // EQUI // 5\nwill attend ==> will attend // EQUI // 5\nMandela funeral ==> Mandela funeral // EQUI // 5'
|
||||
'Nigerian gunmen ==> Nigerian gunmen // EQUI // 5\nkill ==> kill // EQUI // 5\ngroom 30 others ==> groom 30 others // EQUI // 5\nin wedding convoy ==> in wedding convoy // EQUI // 5'
|
||||
'[Deaths] <==> [Deaths] // EQUI // 5\n[rise] <==> [rise] // EQUI // 5\n[in Israeli air strikes] <==> [in Israeli air strikes] // EQUI // 5\n[on Gaza] <==> [on Gaza] // EQUI // 5'
|
||||
'US drone strike ==> US drone strike // EQUI // 5\nkills ==> kills // EQUI // 5\n11 ==> 11 // EQUI // 5\nin Pakistan ==> in Pakistan // EQUI // 5'
|
||||
'[ Islamic militants ] <==> [ Islamic militants ] // EQUI // 5\n[ kill ] <==> [ kill ] // EQUI // 5\n[ 9 foreign tourists 1 Pakistani ] <==> [ 9 foreign tourists 1 Pakistani ] // EQUI // 5'
|
||||
'Boeing 787 Dreamliner <==> Boeing 787 Dreamliner // EQUI // 5\nfire shuts <==> fire shuts // EQUI // 5\nHeathrow <==> Heathrow // EQUI // 5'
|
||||
'Former Pakistan President Pervez Musharraf <==> Former Pakistan President Pervez Musharraf // EQUI // 5\narrested <==> arrested // EQUI // 5\nagain <==> again // EQUI // 5'
|
||||
'Latest storm ==> Latest storm // EQUI // 5\nbrings ==> brings // EQUI // 5\nmore snow ==> more snow // EQUI // 5\nto US East Coast ==> to US East Coast // EQUI // 5'
|
||||
'Powerful 76 quake <==> Powerful 76 quake // EQUI // 5\nstrikes <==> strikes // EQUI // 5\noff Solomons <==> off Solomons // EQUI // 5'
|
||||
'World ==> World // EQUI // 5\nsends ==> sends // EQUI // 5\nemergency relief ==> emergency relief // EQUI // 5\nto battered Philippines ==> to battered Philippines // EQUI // 5'
|
||||
'EU Britain <==> EU Britain // EQUI // 5\nargue <==> argue // EQUI // 5\nover Juncker appointment <==> over Juncker appointment // EQUI // 5'
|
||||
'Dozens ==> Dozens // EQUI // 5\ninjured ==> injured // EQUI // 5\nin Connecticut train collision ==> in Connecticut train collision // EQUI // 5'
|
||||
'Russian Prime Minister <==> Russian Prime Minister // EQUI // 5\nOn Second Day <==> On Second Day // EQUI // 5\nOf Crimea Visit <==> Of Crimea Visit // EQUI // 5'
|
||||
'Barack Obama ==> Barack Obama // EQUI // 5\ncancels ==> cancels // EQUI // 5\nmeeting ==> meeting // EQUI // 5\nwith Vladimir Putin ==> with Vladimir Putin // EQUI // 5\nover Edward Snowden ==> over Edward Snowden // EQUI // 5'
|
||||
'India govt ==> India govt // EQUI // 5\nrejects ==> rejects // EQUI // 5\nproposal ==> proposal // EQUI // 5\nto shut ==> to shut // EQUI // 5\npetrol pumps ==> petrol pumps // EQUI // 5\nat night ==> at night // EQUI // 5'
|
||||
'Israeli war jet strikes ==> Israeli war jet strikes // EQUI // 5\non northern Gaza ==> on northern Gaza // EQUI // 5'
|
||||
'Tropical Storm Karen ==> Tropical Storm Karen // EQUI // 5\ntargets ==> targets // EQUI // 5\nUS Gulf Coast ==> US Gulf Coast // EQUI // 5'
|
||||
'Toyota Honda Nissan and Mazda <==> Toyota Honda Nissan and Mazda // EQUI // 5\nrecall <==> recall // EQUI // 5\n3 million Japanese cars <==> 3 million Japanese cars // EQUI // 5\nafter airbag fault <==> after airbag fault // EQUI // 5'
|
||||
'China ==> China // EQUI // 5\nsends ==> sends // EQUI // 5\nlargest fleet ==> largest fleet // EQUI // 5\nyet ==> yet // EQUI // 5\nto disputed islands ==> to disputed islands // EQUI // 5'
|
||||
'California governor <==> California governor // EQUI // 5\ndeclares <==> declares // EQUI // 5\ndrought emergency <==> drought emergency // EQUI // 5'
|
||||
'Ukraine rebels <==> Ukraine rebels // EQUI // 5\nflee <==> flee // EQUI // 5\neastern stronghold <==> eastern stronghold // EQUI // 5'
|
||||
'France ==> France // EQUI // 5\nwarns ==> warns // EQUI // 5\nof extremists ==> of extremists // EQUI // 5\nbenefiting ==> benefiting // EQUI // 5\nfrom Egypt violence ==> from Egypt violence // EQUI // 5'
|
||||
'Obama ==> Obama // EQUI // 5\nmoves ==> moves // EQUI // 5\nto fill ==> to fill // EQUI // 5\nFed board ==> Fed board // EQUI // 5\ntaps ==> taps // EQUI // 5\nFischer ==> Fischer // EQUI // 5\nto be ==> to be // EQUI // 5\nNo 2 ==> No 2 // EQUI // 5'
|
||||
'[42] [killed] <==> [42] [killed] // EQUI // 5\n[] <==> [] // NOALI // 0\n[63] [wounded] <==> [63] [wounded] // EQUI // 5\n[in violence] <==> [in violence] // EQUI // 5\n[in Iraq] <==> [in Iraq] // EQUI // 5'
|
||||
'US soldier ==> US soldier // EQUI // 5\nfree ==> free // EQUI // 5\nafter almost five years captivity ==> after almost five years captivity // EQUI // 5\nin Afghanistan ==> in Afghanistan // EQUI // 5'
|
||||
'[ Thousands ] <==> [ Thousands ] // EQUI // 5\n[ of Britons ] <==> [ of Britons ] // EQUI // 5\n[ claim ] <==> [ claim ] // EQUI // 5\n[ dole ] <==> [ dole ] // EQUI // 5\n[ in Germany ] <==> [ in Germany ] // EQUI // 5'
|
||||
'Bradley Manning <==> Bradley Manning // EQUI // 5\nacquitted <==> acquitted // EQUI // 5\nof aiding <==> of aiding // EQUI // 5\nthe enemy <==> the enemy // EQUI // 5'
|
||||
'Brazil ==> Brazil // EQUI // 5\nBuzzing ==> Buzzing // EQUI // 5\nWith Anticipation ==> With Anticipation // EQUI // 5\nfor Pope Francis ==> for Pope Francis // EQUI // 5'
|
||||
'Drone Strikes <==> Drone Strikes // EQUI // 5\na Seminary <==> a Seminary // EQUI // 5\nin Pakistan <==> in Pakistan // EQUI // 5'
|
||||
'Gunmen <==> Gunmen // EQUI // 5\nkill <==> kill // EQUI // 5\n10 foreign tourists <==> 10 foreign tourists // EQUI // 5\nin northern Pakistan <==> in northern Pakistan // EQUI // 5'
|
||||
'[ Death toll ] == [ Death toll ] // EQUI // 5\n[ in Nairobi attack ] == [ in Nairobi attack ] // EQUI // 5\n[ rises ] == [ rises ] // EQUI // 5\n[ to 59 ] == [ to 59 ] // EQUI // 5'
|
||||
'Syrian regime <==> Syrian regime // EQUI // 5\nusing <==> using // EQUI // 5\nchemical weapons <==> chemical weapons // EQUI // 5\non a small scale <==> on a small scale // EQUI // 5'
|
||||
'Venezuelan opposition leader <==> Venezuelan opposition leader // EQUI // 5\nsought <==> sought // EQUI // 5'
|
||||
'[ UK ] <==> [ UK ] // EQUI // 5\n[ s Cameron ] <==> [ s Cameron ] // EQUI // 5\n[ seriously ] <==> [ seriously ] // EQUI // 5\n[ concerned ] <==> [ concerned ] // EQUI // 5\n[ by GibraltarSpain row ] <==> [ by GibraltarSpain row ] // EQUI // 5'
|
||||
'What ==> What // EQUI // 5\nthe Papers ==> the Papers // EQUI // 5\nSay ==> Say // EQUI // 5\nNov 19 2013 ==> Nov 19 2013 // EQUI // 5'
|
||||
'Top Diplomats ==> Top Diplomats // EQUI // 5\nMeet ==> Meet // EQUI // 5\nin Munich ==> in Munich // EQUI // 5\nat Critical Time ==> at Critical Time // EQUI // 5'
|
||||
'Iranian nuclear talks ==> Iranian nuclear talks // EQUI // 5\nto be extended ==> to be extended // EQUI // 5\nto November ==> to November // EQUI // 5'
|
||||
'Syrian Opposition ==> Syrian Opposition // EQUI // 5\nMulls ==> Mulls // EQUI // 5\nParticipation ==> Participation // EQUI // 5\nin Peace Talks ==> in Peace Talks // EQUI // 5'
|
||||
'Rudd ==> Rudd // EQUI // 5\nsworn in ==> sworn in // EQUI // 5\nas Australian PM ==> as Australian PM // EQUI // 5'
|
||||
'Obama ==> Obama // EQUI // 5\nsays ==> says // EQUI // 5\nKorean War veterans ==> Korean War veterans // EQUI // 5\ndeserved ==> deserved // EQUI // 5\nbetter ==> better // EQUI // 5'
|
||||
'Andhra Pradesh high court ==> Andhra Pradesh high court // EQUI // 5\nasks ==> asks // EQUI // 5\nTelugu actors Mohan Babu Brahmanandam ==> Telugu actors Mohan Babu Brahmanandam // EQUI // 5\nto return ==> to return // EQUI // 5\nPadma Shri ==> Padma Shri // EQUI // 5'
|
||||
'Pro football player Aaron Hernandez ==> Pro football player Aaron Hernandez // EQUI // 5\ncharged ==> charged // EQUI // 5\nwith murder ==> with murder // EQUI // 5\nin friend ==> in friend // EQUI // 5\ns shooting death ==> s shooting death // EQUI // 5'
|
||||
'Algerian president <==> Algerian president // EQUI // 5\nin France <==> in France // EQUI // 5\nfor medical tests <==> for medical tests // EQUI // 5'
|
||||
'Israel ==> Israel // EQUI // 5\nto release ==> to release // EQUI // 5\nPalestinian prisoners ==> Palestinian prisoners // EQUI // 5'
|
||||
'British NobelWinning Author Doris Lessing <==> British NobelWinning Author Doris Lessing // EQUI // 5\nDies <==> Dies // EQUI // 5'
|
||||
'Carter ==> Carter // EQUI // 5\nstars ==> stars // EQUI // 5\nfor Crusaders ==> for Crusaders // EQUI // 5'
|
||||
'Car bombs ==> Car bombs // EQUI // 5\nkill ==> kill // EQUI // 5\nat least ==> at least // EQUI // 5\n16 ==> 16 // EQUI // 5\nin Iraqi capital ==> in Iraqi capital // EQUI // 5'
|
||||
'Fast amp Furious star Paul Walker ==> Fast amp Furious star Paul Walker // EQUI // 5\ndies ==> dies // EQUI // 5\nin car crash ==> in car crash // EQUI // 5\nat 40 ==> at 40 // EQUI // 5'
|
||||
'China ==> China // EQUI // 5\neases ==> eases // EQUI // 5\nonechild policy ==> onechild policy // EQUI // 5'
|
||||
'Global chemical watchdog <==> Global chemical watchdog // EQUI // 5\nwins <==> wins // EQUI // 5\nNobel Peace Prize <==> Nobel Peace Prize // EQUI // 5'
|
||||
'[ Woman ] <==> [ Woman ] // EQUI // 5\n[ remanded ] <==> [ remanded ] // EQUI // 5\n[ over child ] <==> [ over child ] // EQUI // 5\n[ s death ] <==> [ s death ] // EQUI // 5\n[ A woman ] <==> [ A woman ] // EQUI // 5\n[ has been remanded ] <==> [ has been remanded ] // EQUI // 5\n[ in custody ] <==> [ in custody ] // EQUI // 5\n[ charged ] <==> [ charged ] // EQUI // 5\n[ with murder ] <==> [ with murder ] // EQUI // 5\n[ after an eightyearold girl ] <==> [ after an eightyearold girl ] // EQUI // 5\n[ was found ] <==> [ was found ] // EQUI // 5\n[ dead ] <==> [ dead ] // EQUI // 5\n[ in a block ] <==> [ in a block ] // EQUI // 5\n[ of flats ] <==> [ of flats ] // EQUI // 5'
|
||||
'Police ==> Police // EQUI // 5\nfire ==> fire // EQUI // 5\ntear gas ==> tear gas // EQUI // 5\nat proMorsy protesters ==> at proMorsy protesters // EQUI // 5'
|
||||
'[ 1702 ] <==> [ 1702 ] // EQUI // 5\n[ 23 ] <==> [ 23 ] // EQUI // 5\n[ killed ] <==> [ killed ] // EQUI // 5\n[ in Iraq car bombings ] <==> [ in Iraq car bombings ] // EQUI // 5'
|
||||
'Gunmen <==> Gunmen // EQUI // 5\nkill <==> kill // EQUI // 5\nnine foreign tourists guide <==> nine foreign tourists guide // EQUI // 5\nin Pakistan <==> in Pakistan // EQUI // 5'
|
||||
'ProRussia rebels ==> ProRussia rebels // EQUI // 5\nto hold ==> to hold // EQUI // 5\nvote ==> vote // EQUI // 5\nin Ukraine ==> in Ukraine // EQUI // 5'
|
||||
'Erdogan <==> Erdogan // EQUI // 5\ncriticizes <==> criticizes // EQUI // 5\ncourt ruling <==> court ruling // EQUI // 5\nafter lifting <==> after lifting // EQUI // 5\nTwitter ban <==> Twitter ban // EQUI // 5'
|
||||
'Today <==> Today // EQUI // 5\nin History <==> in History // EQUI // 5\nJan 24 <==> Jan 24 // EQUI // 5'
|
||||
'Janet Yellen ==> Janet Yellen // EQUI // 5\nto become ==> to become // EQUI // 5\nfirst woman ==> first woman // EQUI // 5\nto lead the Federal Reserve ==> to lead the Federal Reserve // EQUI // 5'
|
||||
'Police ==> Police // EQUI // 5\nuse ==> use // EQUI // 5\nwater cannon ==> water cannon // EQUI // 5\nas India gangrape protest ==> as India gangrape protest // EQUI // 5\nturns ==> turns // EQUI // 5\nviolent ==> violent // EQUI // 5'
|
||||
'US consumers ==> US consumers // EQUI // 5\nboost ==> boost // EQUI // 5\nspending ==> spending // EQUI // 5 \n02 pct ==> 02 pct // EQUI // 5\nlast month ==> last month // EQUI // 5'
|
||||
"Iran <==> Iran // EQUI // 5\n's Rouhani <==> 's Rouhani // EQUI // 5\nadmits <==> admits // EQUI // 5\nchemical weapons <==> chemical weapons // EQUI // 5\nkilled <==> killed // EQUI // 5\npeople <==> people // EQUI // 5\nin Syria <==> in Syria // EQUI // 5"
|
||||
'Britain <==> Britain // EQUI // 5\ns Prince Philip <==> s Prince Philip // EQUI // 5\nset for <==> set for // EQUI // 5\nexploratory surgery <==> exploratory surgery // EQUI // 5'
|
||||
'Huge explosion <==> Huge explosion // EQUI // 5\nafter oil train <==> after oil train // EQUI // 5\nderails <==> derails // EQUI // 5\nin Canada <==> in Canada // EQUI // 5'
|
||||
'NATO soldier <==> NATO soldier // EQUI // 5\nkilled <==> killed // EQUI // 5\nin Taliban attack <==> in Taliban attack // EQUI // 5'
|
||||
'[Red Cross workers] <==> [Red Cross workers] // EQUI // 5\n[released] <==> [released] // EQUI // 5\n[in Ukraine] <==> [in Ukraine] // EQUI // 5'
|
||||
'Iran ==> Iran // EQUI // 5\ns nuclear negotiating team ==> s nuclear negotiating team // EQUI // 5\nleaves ==> leaves // EQUI // 5\nfor Kazakhstan ==> for Kazakhstan // EQUI // 5'
|
||||
'Myanmar ==> Myanmar // EQUI // 5\narrests ==> arrests // EQUI // 5\n44 ==> 44 // EQUI // 5\nover sectarian violence ==> over sectarian violence // EQUI // 5'
|
||||
'Google ==> Google // EQUI // 5\nUnveils ==> Unveils // EQUI // 5\nPrototype SelfDriving Car ==> Prototype SelfDriving Car // EQUI // 5'
|
||||
'Israel ==> Israel // EQUI // 5\nto free ==> to free // EQUI // 5\nPalestinian prisoners ==> Palestinian prisoners // EQUI // 5\nover Kerry talks ==> over Kerry talks // EQUI // 5'
|
||||
'[Chemical Arms Team] <==> [Chemical Arms Team] // EQUI // 5\n[to Begin Talks] <==> [to Begin Talks] // EQUI // 5\n[With Syria] <==> [With Syria] // EQUI // 5'
|
||||
'[ Egypt ] <==> [ Egypt ] // EQUI // 5\n[ s opposition ] <==> [ s opposition ] // EQUI // 5\n[ criticizes ] <==> [ criticizes ] // EQUI // 5\n[ president s speech ] <==> [ president s speech ] // EQUI // 5'
|
||||
'Rogers ==> Rogers // EQUI // 5\nhires ==> hires // EQUI // 5\nVodafone UK chief Guy Laurence ==> Vodafone UK chief Guy Laurence // EQUI // 5\nas CEO ==> as CEO // EQUI // 5'
|
||||
'One dead <==> One dead // EQUI // 5\n77 hurt <==> 77 hurt // EQUI // 5\nin riot <==> in riot // EQUI // 5\nat PNG immigration centre <==> at PNG immigration centre // EQUI // 5'
|
||||
'15 dead <==> 15 dead // EQUI // 5\nas Egypt police <==> as Egypt police // EQUI // 5\ndisperse <==> disperse // EQUI // 5\nproMursi camps <==> proMursi camps // EQUI // 5'
|
||||
'Syrian PM <==> Syrian PM // EQUI // 5\nSurvives <==> Survives // EQUI // 5\nBomb Attack <==> Bomb Attack // EQUI // 5'
|
||||
'North Korea ==> North Korea // EQUI // 5\nclaims ==> claims // EQUI // 5\nbreakthrough guided missile ==> breakthrough guided missile // EQUI // 5'
|
||||
'India Ink <==> India Ink // EQUI // 5\nImage <==> Image // EQUI // 5\nof the Day <==> of the Day // EQUI // 5\nFebruary 25 <==> February 25 // EQUI // 5'
|
||||
'Bangladesh collapse search <==> Bangladesh collapse search // EQUI // 5\nover <==> over // EQUI // 5\n(Empty) <==> (Empty) // REL // 5\ndeath toll <==> death toll // EQUI // 5\n1127 <==> 1127 // EQUI // 5'
|
||||
'Japan ==> Japan // EQUI // 5\ns opposition party chief ==> s opposition party chief // EQUI // 5\ncriticizes ==> criticizes // EQUI // 5\nAbe ==> Abe // EQUI // 5\ns economic policies ==> s economic policies // EQUI // 5'
|
||||
'Abdullah <==> Abdullah // EQUI // 5\nahead <==> ahead // EQUI // 5\nin Afghanistan election race <==> in Afghanistan election race // EQUI // 5'
|
||||
'New major earthquake <==> New major earthquake // EQUI // 5\nrocks <==> rocks // EQUI // 5\nsouthwest Pakistan <==> southwest Pakistan // EQUI // 5'
|
||||
'Berlusconi party ministers <==> Berlusconi party ministers // EQUI // 5\nannouncement <==> announcement // EQUI // 5\nof resignation <==> of resignation // EQUI // 5\nthrows <==> throws // EQUI // 5\nItaly <==> Italy // EQUI // 5\ninto political crisis <==> into political crisis // EQUI // 5'
|
||||
'Nicaragua <==> Nicaragua // EQUI // 5\nVenezuela <==> Venezuela // EQUI // 5\noffer <==> offer // EQUI // 5\nasylum <==> asylum // EQUI // 5\nto Snowden <==> to Snowden // EQUI // 5'
|
||||
'17 govt employees <==> 17 govt employees // EQUI // 5\nkilled <==> killed // EQUI // 5\nin Pakistan bus bombing <==> in Pakistan bus bombing // EQUI // 5'
|
||||
'Two hurt ==> Two hurt // EQUI // 5\nin Bangkok shooting ==> in Bangkok shooting // EQUI // 5'
|
||||
'Israel colonel <==> Israel colonel // EQUI // 5\nkilled <==> killed // EQUI // 5\nin Jordan Valley <==> in Jordan Valley // EQUI // 5'
|
||||
'Bangladesh police <==> Bangladesh police // EQUI // 5\narrest <==> arrest // EQUI // 5\nprominent atheist blogger <==> prominent atheist blogger // EQUI // 5'
|
||||
'Cannes hotel raid <==> Cannes hotel raid // EQUI // 5\nnets <==> nets // EQUI // 5\n40 million gems <==> 40 million gems // EQUI // 5'
|
||||
'Chinese News Agency ==> Chinese News Agency // EQUI // 5\nSlams ==> Slams // EQUI // 5\nDelays ==> Delays // EQUI // 5\nin Search for Jet ==> in Search for Jet // EQUI // 5'
|
||||
'Syria envoy ==> Syria envoy // EQUI // 5\nsays ==> says // EQUI // 5\nno peace talks ==> no peace talks // EQUI // 5\nwithout opposition ==> without opposition // EQUI // 5'
|
||||
'Mel Smith ==> Mel Smith // EQUI // 5\ndead ==> dead // EQUI // 5\nat 60 ==> at 60 // EQUI // 5\nand ==> and // EQUI // 5\nUK ==> UK // EQUI // 5\nmourns ==> mourns // EQUI // 5\na comedy favourite ==> a comedy favourite // EQUI // 5'
|
||||
'Driver of crashed Spanish train <==> Driver of crashed Spanish train // EQUI // 5\nheld <==> held // EQUI // 5\non suspicion <==> on suspicion // EQUI // 5\nof negligent homicide <==> of negligent homicide // EQUI // 5'
|
||||
'Iran ==> Iran // EQUI // 5\nwarns ==> warns // EQUI // 5\nagainst military intervention ==> against military intervention // EQUI // 5\nin Iraq ==> in Iraq // EQUI // 5'
|
||||
'Death toll ==> Death toll // EQUI // 5\nrises ==> rises // EQUI // 5\nto 51 ==> to 51 // EQUI // 5\nin Latvian supermarket collapse ==> in Latvian supermarket collapse // EQUI // 5'
|
||||
'Defence ==> Defence // EQUI // 5\nin South Korea ferry trial ==> in South Korea ferry trial // EQUI // 5\nwarns ==> warns // EQUI // 5\nof scapegoating ==> of scapegoating // EQUI // 5'
|
||||
'EDL leaders <==> EDL leaders // EQUI // 5\narrested <==> arrested // EQUI // 5\nduring march <==> during march // EQUI // 5\nto Woolwich <==> to Woolwich // EQUI // 5'
|
||||
'Gunmen <==> Gunmen // EQUI // 5\nkill <==> kill // EQUI // 5\nnine foreign tourists guide <==> nine foreign tourists guide // EQUI // 5\nin Pakistan <==> in Pakistan // EQUI // 5'
|
||||
'Thai opposition party <==> Thai opposition party // EQUI // 5\nto boycott <==> to boycott // EQUI // 5\ngeneral election <==> general election // EQUI // 5'
|
||||
'Syrian warplanes <==> Syrian warplanes // EQUI // 5\nreportedly <==> reportedly // EQUI // 5\nstrike <==> strike // EQUI // 5\nin Iraq <==> in Iraq // EQUI // 5\nNOALI <==> NOALI // NOALI // 5\nkilling <==> killing // EQUI // 5\n57 <==> 57 // EQUI // 5'
|
||||
'Egypt ==> Egypt // EQUI // 5\nchanges ==> changes // EQUI // 5\nvenue ==> venue // EQUI // 5\nfor trial ==> for trial // EQUI // 5\nof ousted president ==> of ousted president // EQUI // 5'
|
||||
'Respected British interviewer David Frost ==> Respected British interviewer David Frost // EQUI // 5\ndies ==> dies // EQUI // 5\nof a heart attack ==> of a heart attack // EQUI // 5'
|
||||
'[ Group ] <==> [ Group ] // EQUI // 5\n[ 7 Palestinians ] <==> [ 7 Palestinians ] // EQUI // 5\n[ killed ] <==> [ killed ] // EQUI // 5\n[ in Syria ] <==> [ in Syria ] // EQUI // 5'
|
||||
'Car bomb ==> Car bomb // EQUI // 5\nat Swedish consulate ==> at Swedish consulate // EQUI // 5\nin Libya ==> in Libya // EQUI // 5\ns Benghazi ==> s Benghazi // EQUI // 5\nno casualties ==> no casualties // EQUI // 5'
|
||||
'US drone strike ==> US drone strike // EQUI // 5\nkills ==> kills // EQUI // 5\nsix ==> six // EQUI // 5\nin Pakistan ==> in Pakistan // EQUI // 5\nofficials ==> officials // EQUI // 5'
|
||||
'Earthquake <==> Earthquake // EQUI // 5\nof 63 magnitude <==> of 63 magnitude // EQUI // 5\nstrikes <==> strikes // EQUI // 5\noff Indonesia island <==> off Indonesia island // EQUI // 5'
|
||||
'[China quake] <==> [China quake] // EQUI // 5\n[leaves] <==> [leaves] // EQUI // 5\n[dozens] <==> [dozens] // EQUI // 5\n[dead] <==> [dead] // EQUI // 5'
|
||||
'Queen ==> Queen // EQUI // 5\nvisits ==> visits // EQUI // 5\nBritain ==> Britain // EQUI // 5\ns new prince ==> s new prince // EQUI // 5'
|
||||
'[Syria opposition] <==> [Syria opposition] // EQUI // 5\n[pressed] <==> [pressed] // EQUI // 5\n[to attend] <==> [to attend] // EQUI // 5\n[talks] <==> [talks] // EQUI // 5'
|
||||
'[Putin] <==> [Putin] // EQUI // 5\n[Submits] <==> [submits] // EQUI // 5\n[federal constitutional law] <==> [federal constitutional law] // EQUI // 5\n[on Crimea] <==> [on Crimea] // EQUI // 5\n[s integration] <==> [s integration] // EQUI // 5\n[into Russia] <==> [into Russia] // EQUI // 5\n[to State Duma] <==> [to State Duma] // EQUI // 5'
|
||||
'Venezuela <==> Venezuela // EQUI // 5\nexpels <==> expels // EQUI // 5\nthree US diplomats <==> three US diplomats // EQUI // 5'
|
||||
'Egypt s bruised Islamists ==> Egypt s bruised Islamists // EQUI // 5\nprotest ==> protest // EQUI // 5\nafter bloody week ==> after bloody week // EQUI // 5'
|
||||
'N Korea <==> N Korea // EQUI // 5\ntestfires <==> testfires // EQUI // 5\n30 shortrange missiles <==> 30 shortrange missiles // EQUI // 5'
|
||||
'Glasgow helicopter death toll ==> Glasgow helicopter death toll // EQUI // 5\nrises ==> rises // EQUI // 5\nto nine ==> to nine // EQUI // 5'
|
||||
'Shanghai stock indices <==> Shanghai stock indices // EQUI // 5\nclose <==> close // EQUI // 5\nmixed <==> mixed // EQUI // 5\nNov 11 <==> Nov 11 // EQUI // 5'
|
||||
'Prince Charles ==> Prince Charles // EQUI // 5\ncompares ==> compares // EQUI // 5\nPutin ==> Putin // EQUI // 5\nto Hitler ==> to Hitler // EQUI // 5'
|
||||
'Syrian army <==> Syrian army // EQUI // 5\nretakes <==> retakes // EQUI // 5\nCrusader castle <==> Crusader castle // EQUI // 5\nfrom rebels <==> from rebels // EQUI // 5'
|
||||
'Syrian opposition <=> Syrian opposition // EQUI // 5\nto meet <=> to meet // EQUI // 5\nover peace talks <=> over peace talks // EQUI // 5'
|
||||
'Four Morsi supporters <==> Four Morsi supporters // EQUI // 5\nkilled <==> killed // EQUI // 5\nin Egypt clashes <==> in Egypt clashes // EQUI // 5'
|
||||
'Five people <==> Five people // EQUI // 5\njailed <==> jailed // EQUI // 5\nover Costa Concordia disaster <==> over Costa Concordia disaster // EQUI // 5'
|
||||
'Tokyo <==> Tokyo // EQUI // 5\nto host <==> to host // EQUI // 5\n2020 Olympic Games <==> 2020 Olympic Games // EQUI // 5'
|
||||
'Suicide bomber ==> Suicide bomber // EQUI // 5\nkills ==> kills // EQUI // 5\neight ==> eight // EQUI // 5\nwounds ==> wounds // EQUI // 5\n13 ==> 13 // EQUI // 5\nin Afghan capital ==> in Afghan capital // EQUI // 5\npolice ==> police // EQUI // 5'
|
||||
'Twelve <==> Twelve // EQUI // 5\nkilled <==> killed // EQUI // 5\nin bomb blast <==> in bomb blast // EQUI // 5\non Pakistani train <==> on Pakistani train // EQUI // 5'
|
||||
'[ 41 ] <==> [ 41 ] // EQUI // 5\n[ killed ] <==> [ killed ] // EQUI // 5\n[ ] <==> [ ] // EQUI // 5\n[ 22 ] <==> [ 22 ] // EQUI // 5\n[ wounded ] <==> [ wounded ] // EQUI // 5\n[ in violent attacks ] <==> [ in violent attacks ] // EQUI // 5\n[ in Iraq ] <==> [ in Iraq ] // EQUI // 5'
|
||||
'China yuan ==> China yuan // EQUI // 5\nadvances ==> advances // EQUI // 5\nto 62035 ==> to 62035 // EQUI // 5\nagainst USD ==> against USD // EQUI // 5\nTuesday ==> Tuesday // EQUI // 5'
|
||||
'New Australian PM Tony Abbott ==> New Australian PM Tony Abbott // EQUI // 5\non his bike ==> on his bike // EQUI // 5\nawaiting ==> awaiting // EQUI // 5\nfinal vote count ==> final vote count // EQUI // 5'
|
||||
'Amazon ==> Amazon // EQUI // 5\nlaunches ==> launches // EQUI // 5\nnew device for streaming video ==> new device for streaming video // EQUI // 5'
|
||||
'Glasgow Helicopter Crash Search <==> Glasgow Helicopter Crash Search // EQUI // 5\nEnds <==> Ends // EQUI // 5'
|
||||
'[ Thousands ] <==> [ Thousands ] // EQUI // 5\n[ evacuated ] <==> [ evacuated ] // EQUI // 5\n[ in Moscow metro fire ] <==> [ in Moscow metro fire ] // EQUI // 5'
|
||||
'Bangladesh opposition ==> Bangladesh opposition // EQUI // 5\nset ==> set // EQUI // 5\nfor mass march ==> for mass march // EQUI // 5\nagainst polls ==> against polls // EQUI // 5'
|
||||
'[40] <==> [40] // EQUI // 5\n[still] <==> [still] // EQUI // 5\n[missing] <==> [missing] // EQUI // 5\n[in Canadian train wreck] <==> [in Canadian train wreck] // EQUI // 5'
|
||||
'[Arsenal stars] <==> [Arsenal stars] // EQUI // 5\n[poised] <==> [poised] // EQUI // 5\n[to wear] <==> [to wear] // EQUI // 5\n[Tshirts] <==> [Tshirts] // EQUI // 5\n[in support] <==> [in support] // EQUI // 5\n[Pat Rice] <==> [Pat Rice] // EQUI // 5\n[s fight] <==> [s fight] // EQUI // 5\n[against cancer] <==> [against cancer] // EQUI // 5'
|
||||
'Colombia <==> Colombia // EQUI // 5\nstrike <==> strike // EQUI // 5\nthree times <==> three times // EQUI // 5'
|
||||
'Polar bear DNA <==> Polar bear DNA // EQUI // 5\nmay help <==> may help // EQUI // 5\nfight <==> fight // EQUI // 5\nobesity <==> obesity // EQUI // 5'
|
||||
'Fire in Russian psychiatric hospital ==> Fire in Russian psychiatric hospital // EQUI // 5\nkills ==> kills // EQUI // 5\n38 ==> 38 // EQUI // 5'
|
||||
'World ==> World // EQUI // 5\ns largest building ==> s largest building // EQUI // 5\nopened ==> opened // EQUI // 5\nin west China ==> in west China // EQUI // 5'
|
||||
'Israel ==> Israel // EQUI // 5\nnot to release ==> not to release // EQUI // 5\nPalestinian prisoners ==> Palestinian prisoners // EQUI // 5'
|
||||
'Volcano erupts <==> Volcano erupts // EQUI // 5\nin Indonesia <==> in Indonesia // EQUI // 5\n100000 <==> 100000 // EQUI // 5\nevacuated <==> evacuated // EQUI // 5\nand <==> and // EQUI // 5\nairports <==> airports // EQUI // 5'
|
||||
'[Search] <==> [Search] // EQUI // 5\n[resumes] <==> [resumes] // EQUI // 5\n[for Malaysia jet] <==> [for Malaysia jet] // EQUI // 5'
|
||||
'China yuan ==> China yuan // EQUI // 5\nweakens ==> weakens // EQUI // 5\nto 61651 ==> to 61651 // EQUI // 5\nagainst USD ==> against USD // EQUI // 5\nTuesday ==> Tuesday // EQUI // 5'
|
||||
'South Africa <==> South Africa // EQUI // 5\nunites <==> unites // EQUI // 5\nin prayer and song <==> in prayer and song // EQUI // 5\nfor Mandela <==> for Mandela // EQUI // 5'
|
||||
'Prince William <=> Prince William // EQUI // 5\naddresses <=> addresses // EQUI // 5\nAustralian parliament video <=> Australian parliament video // EQUI // 5'
|
||||
'Fromer Pakistani army chief Musharraf ==> Fromer Pakistani army chief Musharraf // EQUI // 5\ncharged ==> charged // EQUI // 5\nin 2007 Bhutto assassination ==> in 2007 Bhutto assassination // EQUI // 5'
|
||||
'5 Dead ==> 5 Dead // EQUI // 5\nin India Building Collapse ==> in India Building Collapse // EQUI // 5'
|
||||
'Egypt ==> Egypt // EQUI // 5\nLaunches ==> Launches // EQUI // 5\nOffensive ==> Offensive // EQUI // 5\nAgainst Sinai Militants ==> Against Sinai Militants // EQUI // 5'
|
||||
'Assad <==> Assad // EQUI // 5\nwarns <==> warns // EQUI // 5\nTurkey <==> Turkey // EQUI // 5\nof heavy price <==> of heavy price // EQUI // 5\nfor backing <==> for backing // EQUI // 5\nSyrian rebels <==> Syrian rebels // EQUI // 5'
|
||||
'Deal on Iran nuclear drive <==> Deal on Iran nuclear drive // EQUI // 5\n from Jan 20 <==> from Jan 20 // EQUI // 5'
|
||||
'Two arrests <==> Two arrests // EQUI // 5\nmade <==> made // EQUI // 5\nafter protest <==> after protest // EQUI // 5\nagainst Nigel Farage <==> against Nigel Farage // EQUI // 5'
|
||||
'Nobel author Doris Lessing <==> Nobel author Doris Lessing // EQUI // 5\ndies <==> dies // EQUI // 5\nat 94 <==> at 94 // EQUI // 5'
|
||||
'Man <==> Man // EQUI // 5\nheld <==> held // EQUI // 5\nafter teen shot <==> after teen shot // EQUI // 5\nin Belfast <==> in Belfast // EQUI // 5'
|
||||
'Iran ==> Iran // EQUI // 5\nups ==> ups // EQUI // 5\noil swap ==> oil swap // EQUI // 5\nwith eastern neighbors ==> with eastern neighbors // EQUI // 5'
|
||||
"Pakistan ==> Pakistan // EQUI // 5\n's Musharraf ==> 's Musharraf // EQUI // 5\nUnder Arrest ==> Under Arrest // EQUI // 5\nin Bhutto Assassination ==> in Bhutto Assassination // EQUI // 5"
|
||||
'Pit Bull Dog ==> Pit Bull Dog // EQUI // 5\nKills ==> Kills // EQUI // 5\nBaby Girl ==> Baby Girl // EQUI // 5\nIn Blackburn ==> In Blackburn // EQUI // 5'
|
||||
'Police ==> Police // EQUI // 5\nstop ==> stop // EQUI // 5\nMaldives presidential election ==> Maldives presidential election // EQUI // 5'
|
||||
'10 Things <==> 10 Things // EQUI // 5\nto Know <==> to Know // EQUI // 5\nToday <==> Today // EQUI // 5'
|
||||
'[ Air strike ] <==> [ Air strike ] // EQUI // 5\n[ kills ] <==> [ kills ] // EQUI // 5\n[ one man ] <==> [ one man ] // EQUI // 5\n[ in Syria ] <==> [ in Syria ] // EQUI // 5\n[ s Hama ] <==> [ s Hama ] // EQUI // 5'
|
||||
'One unaccounted for <==> One unaccounted for // EQUI // 5\nin San Francisco plane crash <==> in San Francisco plane crash // EQUI // 5'
|
||||
'China ==> China // EQUI // 5\nurges ==> urges // EQUI // 5\nJapan ==> Japan // EQUI // 5\nto respect ==> to respect // EQUI // 5\nregional security concerns ==> regional security concerns // EQUI // 5'
|
||||
'Israel Forces ==> Israel Forces // EQUI // 5\nKill ==> Kill // EQUI // 5\n2 Palestinian Militants ==> 2 Palestinian Militants // EQUI // 5'
|
||||
'Ukraine talks <==> Ukraine talks // EQUI // 5\ncontinue <==> continue // EQUI // 5\namid growing unrest <==> amid growing unrest // EQUI // 5'
|
||||
'Egyptian police ==> Egyptian police // EQUI // 5\nfire ==> fire // EQUI // 5\ntear gas ==> tear gas // EQUI // 5\nat protesters ==> at protesters // EQUI // 5\nin Cairo ==> in Cairo // EQUI // 5'
|
||||
'Hong Kong <==> Hong Kong // EQUI // 5\nset <==> set // EQUI // 5\nfor democracy march <==> for democracy march // EQUI // 5'
|
||||
'Brazil ==> Brazil // EQUI // 5\nin talks ==> in talks // EQUI // 5\nto hire ==> to hire // EQUI // 5\n6000 Cuban doctors ==> 6000 Cuban doctors // EQUI // 5'
|
||||
'[ Southern Italy Coach Crash ] <==> [ Southern Italy Coach Crash ] // EQUI // 5\n[ Kills ] <==> [ Kills ] // EQUI // 5\n[ 38 ] <==> [ 38 ] // EQUI // 5'
|
||||
'Yemen officials <==> Yemen officials // EQUI // 5 \nUS drone <==> US drone // EQUI // 5 \nkills <==> kills // EQUI // 5 \n2 militants <==> 2 militants // EQUI // 5'
|
||||
'[Nine] <==> [Nine] // EQUI // 5\n[killed] <==> [killed] // EQUI // 5\n[more than 40] <==> [more than 40] // EQUI // 5\n[injured] <==> [injured] // EQUI // 5\n[in multiple bus crash in Germany] <==> [in multiple bus crash in Germany] // EQUI // 5'
|
||||
'Australian PM Kevin Rudd <==> Australian PM Kevin Rudd // EQUI // 5\ncalls <==> calls // EQUI // 5\nelection for 7 September <==> election for 7 September // EQUI // 5'
|
||||
'Police <==> Police // EQUI // 5\nclash <==> clash // EQUI // 5\nwith youth <==> with youth // EQUI // 5\nin Cairo <==> in Cairo // EQUI // 5\nafter antiMorsi protest <==> after antiMorsi protest // EQUI // 5'
|
||||
'[US] <==> [US] // EQUI // 5\n[Issues] <==> [Issues] // EQUI // 5\n[Travel Alert] <==> [Travel Alert] // EQUI // 5\n[Over Al Qaeda Threat] <==> [Over Al Qaeda Threat] // EQUI // 5'
|
||||
'[ Ships ] <==> [ Ships ] // EQUI // 5\n[ Train ] <==> [ Train ] // EQUI // 5\n[ for Syrian Chemical Weapons Transport ] <==> [ for Syrian Chemical Weapons Transport ] // EQUI // 5'
|
||||
'More rain <==> More rain // EQUI // 5\nhampers <==> hampers // EQUI // 5\nIndian flood rescue <==> Indian flood rescue // EQUI // 5'
|
||||
'Aust shares <==> Aust shares // EQUI // 5\nedge <==> edge // EQUI // 5\nhigher <==> higher // EQUI // 5'
|
||||
'US Russia <==> US Russia // EQUI // 5\nagree on <==> agree on // EQUI // 5\nseizure <==> seizure // EQUI // 5\nof Syria chemical weapons <==> of Syria chemical weapons // EQUI // 5'
|
||||
'[Fast amp Furious star Paul Walker] <==> [Fast amp Furious star Paul Walker] // EQUI // 5\n[dies] <==> [dies] // EQUI // 5\n[in car crash] <==> [in car crash] // EQUI // 5'
|
||||
'European Central Bank <==> European Central Bank // EQUI // 5\nkeeps <==> keeps // EQUI // 5\nrates <==> rates // EQUI // 5\nunchanged <==> unchanged // EQUI // 5'
|
||||
'Gunmen <==> Gunmen // EQUI // 5\nkill <==> kill // EQUI // 5\n10 <==> 10 // EQUI // 5\nin Philippine political violence <==> in Philippine political violence // EQUI // 5'
|
||||
'[ 24 ] <==> [ 24 ] // EQUI // 5\n[ killed ] <==> [ killed ] // EQUI // 5\n[ 77 ] <==> [ 77 ] // EQUI // 5\n[ wounded ] <==> [ wounded ] // EQUI // 5\n[ in Iraq mosque bombing ] <==> [ in Iraq mosque bombing ] // EQUI // 5'
|
||||
'Islamists <==> Islamists // EQUI // 5\nstill <==> still // EQUI // 5\nholding <==> holding // EQUI // 5\nsome 30 hostages <==> some 30 hostages // EQUI // 5\nas Kenya mall standoff <==> as Kenya mall standoff // EQUI // 5\ncontinues <==> continues // EQUI // 5'
|
||||
'Heavy smog ==> Heavy smog // EQUI // 5\nhits ==> hits // EQUI // 5\nnorth China city ==> north China city // EQUI // 5\nflights ==> flights // EQUI // 5\ncancelled ==> cancelled // EQUI // 5\nNOALI ==> // NOALI // 0'
|
||||
'Algeria president ==> Algeria president // EQUI // 5\ngets ==> gets // EQUI // 5\ntherapy ==> therapy // EQUI // 5\nafter stroke ==> after stroke // EQUI // 5\n ==> // NOALI // \nstate media ==> state media // EQUI // 5'
|
||||
'Iran ==> Iran // EQUI // 5\nsays ==> says // EQUI // 5\nready ==> ready // EQUI // 5\nfor fresh round ==> for fresh round // EQUI // 5\nof nuclear talks ==> of nuclear talks // EQUI // 5'
|
||||
'Suicide bomber <==> Suicide bomber // EQUI // 5\nkills <==> kills // EQUI // 5\n23 Iraqi army recruits <==> 23 Iraqi army recruits // EQUI // 5'
|
||||
'Obama ==> Obama // EQUI // 5\nnominates ==> nominates // EQUI // 5\nnew transportation secretary ==> new transportation secretary // EQUI // 5'
|
||||
'Super typhoon <==> Super typhoon // EQUI // 5\nhits <==> hits // EQUI // 5\nPhilippines <==> Philippines // EQUI // 5'
|
||||
'Google <==> Google // EQUI // 5\nreleases <==> releases // EQUI // 5\nNexus 5 phone <==> Nexus 5 phone // EQUI // 5\nwith Kit Kat <==> with Kit Kat // EQUI // 5'
|
||||
'Judge ==> Judge // EQUI // 5\nOKs ==> OKs // EQUI // 5\nforcefeeding California inmates ==> forcefeeding California inmates // EQUI // 5'
|
||||
'Ride operator ==> Ride operator // EQUI // 5\ncharged ==> charged // EQUI // 5\nin NC fair accident ==> in NC fair accident // EQUI // 5'
|
||||
'US NATO ==> US NATO // EQUI // 5\nRaise ==> Raise // EQUI // 5\nDoubts ==> Doubts // EQUI // 5\nRussia Pulled Back ==> Russia Pulled Back // EQUI // 5\nfrom Ukraine Border ==> from Ukraine Border // EQUI // 5'
|
||||
'Early vote count in Iran ==> Early vote count in Iran // EQUI // 5\ngives ==> gives // EQUI // 5\nreformer ==> reformer // EQUI // 5\nwide lead ==> wide lead // EQUI // 5'
|
||||
'Australia ==> Australia // EQUI // 5\nreturns ==> returns // EQUI // 5\nasylum seekers ==> asylum seekers // EQUI // 5\nto Sri Lanka ==> to Sri Lanka // EQUI // 5\nin sea transfer ==> in sea transfer // EQUI // 5'
|
||||
'Lebanon Clashes ==> Lebanon Clashes // EQUI // 5\nRage ==> Rage // EQUI // 5\nOvernight ==> Overnight // EQUI // 5\nNOALI ==> // NOALI // \n12 Soldiers ==> 12 Soldiers // EQUI // 5\nDead ==> Dead // EQUI // 5'
|
||||
'Los Angeles airport worker <==> Los Angeles airport worker // EQUI // 5\ncharged <==> charged // EQUI // 5\nwith making 911 threats <==> with making 911 threats // EQUI // 5'
|
||||
'[2] killed <==> [2] killed // EQUI // 5\n[] <==> [] // NOALI // 0\nmultiple <==> multiple // EQUI // 5\nwounded <==> wounded // EQUI // 5\nin Miami shooting <==> in Miami shooting // EQUI // 5'
|
||||
'[ 9 ] <==> [ 9 ] // EQUI // 5\n[ Killed ] <==> [ Killed ] // EQUI // 5\n[ in Attack ] <==> [ in Attack ] // EQUI // 5\n[ on Indian Consulate ] <==> [ on Indian Consulate ] // EQUI // 5\n[ in Afghanistan ] <==> [ in Afghanistan ] // EQUI // 5'
|
||||
'6 Killed <==> 6 Killed // EQUI // 5\nby Hot Lava <==> by Hot Lava // EQUI // 5\nas Indonesia Volcano Erupts <==> as Indonesia Volcano Erupts // EQUI // 5'
|
||||
'China yuan <==> China yuan // EQUI // 5\nweakens <==> weakens // EQUI // 5\nto 61059 <==> to 61059 // EQUI // 5\nagainst USD <==> against USD // EQUI // 5'
|
||||
'Ukraine <==> Ukraine // EQUI // 5\nhosts <==> hosts // EQUI // 5\nunity talks <==> unity talks // EQUI // 5'
|
||||
'Iraqi Forces <==> Iraqi Forces // EQUI // 5\nMilitants <==> Militants // EQUI // 5\nBattle <==> Battle // EQUI // 5\nfor Oil Refinery <==> for Oil Refinery // EQUI // 5'
|
||||
'Cameron <==> Cameron // EQUI // 5 \nhas <==> has // EQUI // 5 \nno concerns <==> no concerns // EQUI // 5 \nover halal meat <==> over halal meat // EQUI // 5'
|
||||
'Bangladesh factory owners ==> Bangladesh factory owners // EQUI // 5\narrested ==> arrested // EQUI // 5\nas death toll ==> as death toll // EQUI // 5\nnears ==> nears // EQUI // 5\n350 ==> 350 // EQUI // 5'
|
||||
'EU foreign ministers <==> EU foreign ministers // EQUI // 5\nto discuss <==> to discuss // EQUI // 5\nhow <==> how // EQUI // 5\nto press <==> to press // EQUI // 5\nEgypt <==> Egypt // EQUI // 5\nover bloodshed <==> over bloodshed // EQUI // 5'
|
||||
'Cairo protesters <==> Cairo protesters // EQUI // 5\ndefy <==> defy // EQUI // 5\ncrackdown threat <==> crackdown threat // EQUI // 5'
|
||||
'Six ==> Six // EQUI // 5\nkilled ==> killed // EQUI // 5\nin drone strike ==> in drone strike // EQUI // 5\nin N Waziristan ==> in N Waziristan // EQUI // 5'
|
||||
'[ UN ] <==> [ UN ] // EQUI // 5\n[ launches ] <==> [ launches ] // EQUI // 5\n[ US300 mil appeal ] <==> [ US300 mil appeal ] // EQUI // 5\n[ for Philippines ] <==> [ for Philippines ] // EQUI // 5'
|
||||
'N Korea <==> N Korea // EQUI // 5\npostpones <==> postpones // EQUI // 5\nfamily reunions <==> family reunions // EQUI // 5\nwith South <==> with South // EQUI // 5'
|
||||
'N Korea ==> N Korea // EQUI // 5\ns Kim ==> s Kim // EQUI // 5\nhails ==> hails // EQUI // 5\nexecution ==> execution // EQUI // 5\nof powerful uncle ==> of powerful uncle // EQUI // 5'
|
||||
'Pregnant Sudan woman ==> Pregnant Sudan woman // EQUI // 5\nwho ==> who // EQUI // 5\nmarried ==> married // EQUI // 5\na Christian man ==> a Christian man // EQUI // 5\nsentenced ==> sentenced // EQUI // 5\nto death ==> to death // EQUI // 5\nfor apostasy ==> for apostasy // EQUI // 5'
|
||||
'Libya <==> Libya // EQUI // 5\nasks <==> asks // EQUI // 5\nUN Security Council <==> UN Security Council // EQUI // 5\nfor help <==> for help // EQUI // 5'
|
||||
'[ Death toll ] <==> [ Death toll ] // EQUI // 5\n[ from Baghdad bombs ] <==> [ from Baghdad bombs ] // EQUI // 5\n[ rises ] <==> [ rises ] // EQUI // 5\n[ to 19 ] <==> [ to 19 ] // EQUI // 5'
|
||||
'France ==> France // EQUI // 5\nSays ==> Says // EQUI // 5\nWont Shirk ==> Wont Shirk // EQUI // 5\nResponsibilities ==> Responsibilities // EQUI // 5\nin Syria ==> in Syria // EQUI // 5'
|
||||
'Boeing 777 plane <==> Boeing 777 plane // EQUI // 5\ncrashlands <==> crashlands // EQUI // 5\nat San Francisco airport <==> at San Francisco airport // EQUI // 5'
|
||||
'Tymoshenko ==> Tymoshenko // EQUI // 5\nurges ==> urges // EQUI // 5\nEU ==> EU // EQUI // 5\nto take ==> to take // EQUI // 5\nstrong action ==> strong action // EQUI // 5\non Crimea ==> on Crimea // EQUI // 5'
|
||||
'Monty Python <==> Monty Python // EQUI // 5\nto reunite <==> to reunite // EQUI // 5\nfor new show <==> for new show // EQUI // 5'
|
||||
'Record-setting Southwest heat wave <==> Record-setting Southwest heat wave // EQUI // 5\nRecord-setting Southwest heat wave <==> turns // NOALI // 0\nRecord-setting Southwest heat wave <==> fatal // NOALI // 0\nturns <==> Record-setting Southwest heat wave // NOALI // 0\nturns <==> turns // EQUI // 5\nturns <==> fatal // NOALI // 0\nfatal <==> Record-setting Southwest heat wave // NOALI // 0\nfatal <==> turns // NOALI // 0\nfatal <==> fatal // EQUI // 5'
|
||||
'3 dead <==> 3 dead // EQUI // 5\n1 <==> 1 // EQUI // 5\nhurt <==> hurt // EQUI // 5\nin shooting <==> in shooting // EQUI // 5\nat S Carolina oceanfront motel <==> at S Carolina oceanfront motel // EQUI // 5'
|
||||
'Nigerian leader <==> Nigerian leader // EQUI // 5\norders <==> orders // EQUI // 5\npush <==> push // EQUI // 5\nto free <==> to free // EQUI // 5\nabducted schoolgirls <==> abducted schoolgirls // EQUI // 5'
|
||||
'Boston bomb suspect <==> Boston bomb suspect // EQUI // 5\ncharged <==> charged // EQUI // 5\nreligious motive <==> religious motive // EQUI // 5\nseen <==> seen // EQUI // 5\n<==> // NOALI // 0'
|
||||
'[Scores] ==> [Scores] // EQUI // 5\n[Killed] ==> [Killed] // EQUI // 5\n[In Egyptian Protests] ==> [In Egyptian Protests] // EQUI // 5'
|
||||
'Taiwan stocks <==> Taiwan stocks // EQUI // 5\nclose <==> close // EQUI // 5\n056 pct <==> 056 pct // EQUI // 5\nhigher <==> higher // EQUI // 5'
|
||||
'Cypriot Finance Minister Michalis Sarris <==> Cypriot Finance Minister Michalis Sarris // EQUI // 5\nresigns <==> resigns // EQUI // 5'
|
||||
'Egypt turmoil <==> Egypt turmoil // EQUI // 5\ndeepens <==> deepens // EQUI // 5\nmilitants <==> militants // EQUI // 5\nkill <==> kill // EQUI // 5\n25 policemen <==> 25 policemen // EQUI // 5'
|
||||
'Berlusconi ==> Berlusconi // EQUI // 5\nsaid ==> said // EQUI // 5\nto pay off ==> to pay off // EQUI // 5\nbunga bunga witnesses ==> bunga bunga witnesses // EQUI // 5'
|
||||
'Microsoft => Microsoft // EQUI // 5\nfinally => finally // EQUI // 5\nunveils => unveils // EQUI // 5\nOffice => Office // EQUI // 5\nfor Apple => for Apple // EQUI // 5\ns iPad => s iPad // EQUI // 5'
|
||||
'Glee star Cory Monteith <==> Glee star Cory Monteith // EQUI // 5\nfound <==> found // EQUI // 5\ndead <==> dead // EQUI // 5\nin hotel room <==> in hotel room // EQUI // 5\nin Canada <==> in Canada // EQUI // 5'
|
||||
'Russia ==> Russia // EQUI // 5\npraises ==> praises // EQUI // 5\nidea ==> idea // EQUI // 5\nto render ==> to render // EQUI // 5\nrights assistance ==> rights assistance // EQUI // 5\nto Ukraine ==> to Ukraine // EQUI // 5'
|
||||
'New Orleans police ==> New Orleans police // EQUI // 5\narrest ==> arrest // EQUI // 5 \nsecond suspect ==> second suspect // EQUI // 5\nin Mother ==> in Mother // EQUI // 5\ns Day parade shooting ==> s Day parade shooting // EQUI // 5'
|
||||
'[Protesters] <==> [Protesters] // EQUI // 5\n[swamp] <==> [swamp] // EQUI // 5\n[streets] <==> [streets] // EQUI // 5\n[to reclaim] <==> [to reclaim] // EQUI // 5\n[revolution] <==> [revolution] // EQUI // 5'
|
||||
'Hong Kong stocks <==> Hong Kong stocks // EQUI // 5\nend <==> end // EQUI // 5\nhigher <==> higher // EQUI // 5'
|
||||
'[Poll] <==> [Poll] // EQUI // 5\n[Cory Booker] <==> [Cory Booker] // EQUI // 5\n[takes] <==> [takes] // EQUI // 5\n[early lead] <==> [early lead] // EQUI // 5\n[for NJ Senate] <==> [for NJ Senate] // EQUI // 5'
|
||||
'Rebels ==> Rebels // EQUI // 5\nlikely ==> likely // EQUI // 5\ndowned ==> downed // EQUI // 5\nMalaysian jet ==> Malaysian jet // EQUI // 5\nby mistake ==> by mistake // EQUI // 5\nUS officials ==> US officials // EQUI // 5'
|
||||
'[ Wave ] <==> [ Wave ] // EQUI // 5\n[ of bombings shootings ] <==> [ of bombings shootings ] // EQUI // 5\n[ kill ] <==> [ kill ] // EQUI // 5\n[ 38 ] <==> [ 38 ] // EQUI // 5\n[ in Iraq ] <==> [ in Iraq ] // EQUI // 5'
|
||||
'Didier Reynders ==> Didier Reynders // EQUI // 5\non Syria ==> on Syria // EQUI // 5'
|
||||
'[ 5 things ] <==> [ 5 things ] // EQUI // 5\n[ you ] <==> [ you ] // EQUI // 5\n[ need ] <==> [ need ] // EQUI // 5\n[ to know ] <==> [ to know ] // EQUI // 5\n[ Tuesday ] <==> [ Tuesday ] // EQUI // 5'
|
||||
"Egyptian court ==> Egyptian court // EQUI // 5\nconsiders ==> considers // EQUI // 5\nMubarak's release ==> Mubarak's release // EQUI // 5"
|
||||
'[ UN Security Council ] <==> [ UN Security Council ] // EQUI // 5\n[ calls ] <==> [ calls ] // EQUI // 5\n[ for humanitarian ceasefire ] <==> [ for humanitarian ceasefire ] // EQUI // 5\n[ in Gaza ] <==> [ in Gaza ] // EQUI // 5'
|
||||
'Pakistani Taliban chief Hakimullah Mehsud ==> Pakistani Taliban chief Hakimullah Mehsud // EQUI // 5\nkilled ==> killed // EQUI // 5\nin drone strike ==> in drone strike // EQUI // 5'
|
||||
'Blaze <==> Blaze // EQUI // 5\nThat <==> That // EQUI // 5\nKilled <==> Killed // EQUI // 5\n19 Firefighters <==> 19 Firefighters // EQUI // 5\nNow <==> Now // EQUI // 5\nContained <==> Contained // EQUI // 5'
|
||||
'Canada ==> Canada // EQUI // 5\napproves ==> approves // EQUI // 5\noil pipeline ==> oil pipeline // EQUI // 5\nto the Pacific Coast ==> to the Pacific Coast // EQUI // 5'
|
||||
'Man United ==> Man United // EQUI // 5\nSays ==> Says // EQUI // 5\nMoyes ==> Moyes // EQUI // 5\nLeaves ==> Leaves // EQUI // 5\nas Manager ==> as Manager // EQUI // 5'
|
||||
'Syria opposition unity <==> Syria opposition unity // EQUI // 5\ntake <==> take // EQUI // 5\nface specter <==> face specter // EQUI // 5\nof collapse <==> of collapse // EQUI // 5'
|
||||
'Margaret Thatcher former UK PM <==> Margaret Thatcher former UK PM // EQUI // 5\ndead <==> dead // EQUI // 5\nat 87 <==> at 87 // EQUI // 5'
|
||||
'[EU] <==> [EU] // EQUI // 5\n[extends] <==> [extends] // EQUI // 5\n[sanctions] <==> [sanctions] // EQUI // 5\n[against Russia] <==> [against Russia] // EQUI // 5'
|
||||
'Chinese stocks ==> Chinese stocks // EQUI // 5\nopen ==> open // EQUI // 5\nhigher ==> higher // EQUI // 5\nApr 2 ==> Apr 2 // EQUI // 5'
|
||||
'Obama ==> Obama // EQUI // 5\nnames ==> names // EQUI // 5\noutspoken Rice ==> outspoken Rice // EQUI // 5\nas his security adviser ==> as his security adviser // EQUI // 5'
|
||||
'Activists <==> Activists // EQUI // 5\nSyrian airstrike <==> Syrian airstrike // EQUI // 5\nkills <==> kills // EQUI // 5\n21 <==> 21 // EQUI // 5\nin Aleppo <==> in Aleppo // EQUI // 5'
|
||||
'[ Angelina Jolie ] <==> [ Angelina Jolie ] // EQUI // 5\n[ very grateful ] <==> [ very grateful ] // EQUI // 5\n[ for support ] <==> [ for support ] // EQUI // 5\n[ after double mastectomy ] <==> [ after double mastectomy ] // EQUI // 5'
|
||||
'India Ink <==> India Ink // EQUI // 5\nImage <==> Image // EQUI // 5\nof the Day <==> of the Day // EQUI // 5\nApril 21 <==> April 21 // EQUI // 5'
|
||||
'[Deaths] <==> [Deaths] // EQUI // 5\n[in bombings and shootings] <==> [in bombings and shootings] // EQUI // 5\n[across Iraq] <==> [across Iraq] // EQUI // 5'
|
||||
'Suicide bomber <==> Suicide bomber // EQUI // 5\nkills <==> kills // EQUI // 5\nnine police <==> nine police // EQUI // 5\nin north Iraq <==> in north Iraq // EQUI // 5'
|
||||
'Egypt ==> Egypt // EQUI // 5\ns Sisi ==> s Sisi // EQUI // 5\npoised ==> poised // EQUI // 5\nto announce ==> to announce // EQUI // 5\npresidency bid ==> presidency bid // EQUI // 5'
|
||||
'[3] <==> [3] // EQUI // 5\n[killed] <==> [killed] // EQUI // 5\n[6] <==> [6] // EQUI // 5\n[injured] <==> [injured] // EQUI // 5\n[in Afghan blasts] <==> [in Afghan blasts] // EQUI // 5'
|
||||
'Texas senator <==> Texas senator // EQUI // 5\nstill <==> still // EQUI // 5\nfilibustering <==> filibustering // EQUI // 5\nabortion bill <==> abortion bill // EQUI // 5'
|
||||
'Russia ==> Russia // EQUI // 5\ndrops ==> drops // EQUI // 5\ncharges ==> charges // EQUI // 5\nagainst Greenpeace activists ==> against Greenpeace activists // EQUI // 5'
|
||||
'Protesters <==> Protesters // EQUI // 5\nclash <==> clash // EQUI // 5\nwith police <==> with police // EQUI // 5\nin Turkey <==> in Turkey // EQUI // 5'
|
||||
'Nigerian police ==> Nigerian police // EQUI // 5\nban ==> ban // EQUI // 5\nprotests ==> protests // EQUI // 5\nover kidnapped girls ==> over kidnapped girls // EQUI // 5'
|
||||
'Ukraine ==> Ukraine // EQUI // 5\nsigns ==> signs // EQUI // 5\nEU trade pact ==> EU trade pact // EQUI // 5\nas Russia ==> as Russia // EQUI // 5\nfinalizes ==> finalizes // EQUI // 5\nCrimea annexation ==> Crimea annexation // EQUI // 5'
|
||||
'The foundations <==> The foundations // EQUI // 5\nof South Africa <==> of South Africa // EQUI // 5\nare built <==> are built // EQUI // 5\non Nelson Mandela <==> on Nelson Mandela // EQUI // 5\ns memory <==> s memory // EQUI // 5'
|
||||
'On War Criminals and Heroes <==> The Whitewashing // REL // 3\nOn War Criminals and Heroes <==> of Ariel Sharon // REL // 3\n [ ] <==> The Whitewashing // NOALI // 0\n [ ] <==> of Ariel Sharon // NOALI // 0'
|
||||
'Two French journalists <==> Two French journalists // EQUI // 5\nkilled <==> killed // EQUI // 5\nin Mali <==> in Mali // EQUI // 5'
|
||||
'China yuan <==> China yuan // EQUI // 5\nstrengthens <==> strengthens // EQUI // 5\nto 61184 <==> to 61184 // EQUI // 5\nagainst USD <==> against USD // EQUI // 5\nTuesday <==> Tuesday // EQUI // 5'
|
||||
'Malaysia Airlines <==> Malaysia Airlines // EQUI // 5\nloses contact <==> loses contact // EQUI // 5\nwith plane <==> with plane // EQUI // 5\ncarrying <==> carrying // EQUI // 5\n239 <==> 239 // EQUI // 5\nto Beijing <==> to Beijing // EQUI // 5'
|
||||
'Tehelka editor <==> Tehelka editor // EQUI // 5\nembroiled <==> embroiled // EQUI // 5\nin sex assault scam <==> in sex assault scam // EQUI // 5'
|
||||
'Facts ==> Facts // EQUI // 5\nabout North Korea ==> about North Korea // EQUI // 5\ns Musudan missile ==> s Musudan missile // EQUI // 5'
|
||||
'Gunman ==> Gunman // EQUI // 5\nwho ==> who // EQUI // 5\nheld ==> held // EQUI // 5\nsuburban Atlanta firefighters hostage ==> suburban Atlanta firefighters hostage // EQUI // 5\nnow ==> now // EQUI // 5\ndead ==> dead // EQUI // 5\n ==> // NOALI // 0\npolice ==> police // EQUI // 5\nsay ==> say // EQUI // 5'
|
||||
'Crimea parliament <==> Crimea parliament // EQUI // 5\nvotes <==> votes // EQUI // 5\nto join <==> to join // EQUI // 5\nRussia <==> Russia // EQUI // 5'
|
||||
'China stocks ==> China stocks // EQUI // 5\nopen ==> open // EQUI // 5\nlower ==> lower // EQUI // 5\non Wednesday ==> on Wednesday // EQUI // 5'
|
||||
'World News Update <==> World News Update // EQUI // 5\n[] <==> [] // EQUI // 5\nwhat <==> what // EQUI // 5\nyou <==> you // EQUI // 5\nneed <==> need // EQUI // 5\nto know <==> to know // EQUI // 5'
|
||||
'[ Stocks ] <==> [ Stocks ] // EQUI // 5\n[ rise ] <==> [ rise ] // EQUI // 5\n[ in early trading ] <==> [ in early trading ] // EQUI // 5'
|
||||
'Twitter shares <==> Twitter shares // EQUI // 5\nrocket <==> rocket // EQUI // 5\nat opening <==> at opening // EQUI // 5'
|
||||
'Explosions <==> Explosions // EQUI // 5\nkill <==> kill // EQUI // 5\n29 <==> 29 // EQUI // 5\noutside two mosques <==> outside two mosques // EQUI // 5\nin Lebanon s Tripoli <==> in Lebanon s Tripoli // EQUI // 5'
|
||||
'Next Iran nuclear talks <==> Next Iran nuclear talks // EQUI // 5\ndue <==> due // EQUI // 5\non June 1620 <==> on June 1620 // EQUI // 5\nIRNA <==> IRNA // EQUI // 5'
|
||||
'Russian coal mines ==> Russian coal mines // EQUI // 5\nseek ==> seek // EQUI // 5\nnew outlets ==> new outlets // EQUI // 5\nin China ==> in China // EQUI // 5'
|
||||
'Outrage ==> Outrage // EQUI // 5\nafter reports ==> after reports // EQUI // 5\nemerge ==> emerge // EQUI // 5\nabout 6yearold Indian girl ==> about 6yearold Indian girl // EQUI // 5\nraped ==> raped // EQUI // 5\nat school ==> at school // EQUI // 5'
|
||||
'Ovadia Yosef <==> Ovadia Yosef // EQUI // 5\nrabbi and Israeli kingmaker <==> rabbi and Israeli kingmaker // EQUI // 5\ndies <==> dies // EQUI // 5'
|
||||
'Thai protesters <==> Thai protesters // EQUI // 5\ndisrupt <==> disrupt // EQUI // 5\nearly voting <==> early voting // EQUI // 5\nfor disputed election <==> for disputed election // EQUI // 5'
|
||||
'Most victims <==> Most victims // EQUI // 5\nof fiery bus crash <==> of fiery bus crash // EQUI // 5\nidentified <==> identified // EQUI // 5'
|
||||
'Palestinian diplomat ==> Palestinian diplomat // EQUI // 5\nhurt ==> hurt // EQUI // 5\nin blast ==> in blast // EQUI // 5'
|
||||
92
alignments_with_training_headlines2.wa
Normal file
92
alignments_with_training_headlines2.wa
Normal file
@ -0,0 +1,92 @@
|
||||
<sentence id="40" status="">
|
||||
<alignment>
|
||||
0 1 <==> 0 1 // SIMI // 4 // Chinese shares <==> Japanese shares
|
||||
3 <==> 3 // EQUI // 5 // Tuesday <==> Tuesday
|
||||
2 <==> 2 // EQUI // 5 // higher <==> higher
|
||||
2 3 <==> 4 // NOALI // NIL // higher Tuesday <==> -not aligned-
|
||||
1 <==> 1 // EQUI // 5 // close <==> close
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="275" status="">
|
||||
<alignment>
|
||||
0 <==> 4 5 6 // NOALI // NIL // -not aligned- <==> was at Shiite place of worship
|
||||
3 4 <==> 3 4 // EQUI // 5 // 77 <==> 77
|
||||
1 2 <==> 1 2 // EQUI // 5 // killed <==> killed
|
||||
5 6 7 <==> 5 6 // REL // 2 // in Iraq mosque bombing <==> at the mosque
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="56" status="">
|
||||
<alignment>
|
||||
1 2 <==> 0 // NOALI // NIL // on Iraq Funeral <==> -not aligned-
|
||||
0 <==> 1 // NOALI // NIL // -not aligned- <==> Owner
|
||||
2 <==> 3 // NOALI // NIL // Kill <==> -not aligned-
|
||||
3 <==> 3 // EQUI // 5 // 92 <==> 92
|
||||
4 5 <==> 2 4 // EQUI // 5 // Other Attacks <==> Other Deadly Attacks
|
||||
0 <==> 5 6 // NOALI // NIL // Assault <==> -not aligned-
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="290" status="">
|
||||
<alignment>
|
||||
0 1 2 <==> 0 1 2 // EQUI // 5 // Los Angeles airport worker <==> Los Angeles airport worker
|
||||
3 <==> 3 // EQUI // 5 // charged <==> charged
|
||||
4 5 6 <==> 4 5 6 // EQUI // 5 // with making 911 threats <==> with making 911 threats
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="298" status="">
|
||||
<alignment>
|
||||
1 2 3 <==> 0 1 // SIMI // 4 // Bangladesh factory owners <==> Singapore cruise operators
|
||||
5 6 <==> 3 // NOALI // NIL // as death toll <==> -not aligned-
|
||||
2 <==> 2 // EQUI // 5 // arrested <==> arrested
|
||||
3 <==> 4 // NOALI // NIL // nears <==> -not aligned-
|
||||
4 <==> 5 // EQUI // 5 // 350 <==> 350
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="1" status="">
|
||||
<alignment>
|
||||
[0] [1] <==> [0] [1] // EQUI // 5 // China's Peace Ark <==> China's Peace Ark
|
||||
[3] [4] <==> [3] [4] // EQUI // 5 // for the Philippines <==> for the Philippines
|
||||
[5] <==> [5] // EQUI // 5 // Thursday <==> Thursday
|
||||
[2] <==> [2] // EQUI // 5 // departs <==> departs
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="233" status="">
|
||||
<alignment>
|
||||
[ Fire in Russian psychiatric hospital ] <==> [ Fire in Russian psychiatric hospital ] // EQUI // 5
|
||||
[ kills ] <==> [ kills ] // EQUI // 5
|
||||
[ 38 ] <==> [ 38 ] // EQUI // 5
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="338" status="">
|
||||
<alignment>
|
||||
2 <==> 0 // NOALI // NIL // face specter <==> -not aligned-
|
||||
3 <==> 0 1 2 // REL // 1 // of collapse <==> shows signs of tension
|
||||
0 <==> 2 // NOALI // NIL // Syria opposition unity <==> -not aligned-
|
||||
1 <==> 1 // EQUI // 5 // take <==> take
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="102" status="">
|
||||
<alignment>
|
||||
1 <==> 1 // EQUI // 5 // rise <==> rise
|
||||
2 <==> 2 // EQUI // 5 // in Israeli air strikes <==> in Israeli air strikes
|
||||
3 4 <==> 3 4 // EQUI // 5 // on Gaza <==> on Gaza
|
||||
0 <==> 0 // EQUI // 5 // Deaths <==> Deaths
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="10" status="">
|
||||
<alignment>
|
||||
1 2 <==> 1 2 // EQUI // 5 // to execute <==> to execute
|
||||
3 <==> 3 // EQUI // 5 // Palestinian captives <==> Palestinian captives
|
||||
0 <==> 0 // EQUI // 5 // Israeli calls <==> Israeli calls
|
||||
4 5 <==> 4 5 // EQUI // 5 // instead of releasing them <==> instead of releasing them
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
45
alignments_with_training_student.wa
Normal file
45
alignments_with_training_student.wa
Normal file
@ -0,0 +1,45 @@
|
||||
<sentence id="320" status="">
|
||||
<alignment>
|
||||
1 <==> 0 // NOALI // NIL // Bulb terminals 3 and 4 <==> -not aligned-
|
||||
2 3 <==> 1 2 3 // REL // 3 // Terminal 1 and the positive terminal <==> Terminal 1 and the positive terminal
|
||||
4 <==> 4 // EQUI // 5 // are separated <==> are separated
|
||||
0 <==> 5 // NOALI // NIL // -not aligned- <==> by the gap
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="261" status="">
|
||||
<alignment>
|
||||
1 <==> 3 4 // REL // 3 // the the bulb terminal <==> the terminals
|
||||
5 <==> 1 // NOALI // NIL // disconnected <==> -not aligned-
|
||||
0 <==> 2 // NOALI // NIL // -not aligned- <==> are not connected
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="115" status="">
|
||||
<alignment>
|
||||
1 <==> 0 // NOALI // NIL // Because <==> -not aligned-
|
||||
4 <==> 5 // EQUI // 5 // The bulb <==> The bulb
|
||||
3 <==> 2 // EQUI // 5 // does not have <==> is not
|
||||
6 7 <==> 8 9 // EQUI // 5 // in the closed path <==> in the closed path containing
|
||||
2 <==> 1 // EQUI // 5 // a closed path <==> closed path
|
||||
0 <==> 3 // NOALI // NIL // -not aligned- <==> the battery
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="221" status="">
|
||||
<alignment>
|
||||
1 2 3 4 <==> 2 3 4 5 // EQUI // 5 // Bulbs B and C <==> B and C
|
||||
5 6 <==> 7 8 // EQUI // 5 // are contained <==> are
|
||||
7 8 9 10 <==> 6 7 8 9 // EQUI // 5 // on the same path <==> in the same path with the battery
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
<sentence id="311" status="">
|
||||
<alignment>
|
||||
1 <==> 0 // NOALI // NIL // Because <==> -not aligned-
|
||||
4 <==> 6 7 8 // EQUI // 5 // from the positive battery terminal <==> by the gap
|
||||
2 3 <==> 1 2 3 // EQUI // 5 // is seperated <==> are separated
|
||||
0 <==> 4 // NOALI // NIL // -not aligned- <==> Terminal 1
|
||||
</alignment>
|
||||
</sentence>
|
||||
|
||||
@ -2,38 +2,34 @@ import processing
|
||||
import pandas as pd
|
||||
import gpt_alignment
|
||||
|
||||
# paths to students andsewrs database
|
||||
studentAnswers1_path = "test_goldStandard/student/STSint.testinput.answers-students.sent1.txt"
|
||||
studentAnswers2_path = "test_goldStandard/student/STSint.testinput.answers-students.sent2.txt"
|
||||
studentAnsewrs_chunked_path1 = "test_goldStandard/student/STSint.testinput.answers-students.sent1.chunk.txt"
|
||||
studentAnsewrs_chunked_path2 = "test_goldStandard/student/STSint.testinput.answers-students.sent2.chunk.txt"
|
||||
studentsAnsewrs_alignment_path = "test_goldStandard/student/STSint.testinput.answers-students.wa"
|
||||
# Specify the output file path
|
||||
file_path = "alignments_unformatted_headlines.txt"
|
||||
|
||||
# paths to students answers database
|
||||
#chunked_path1 = "test_goldStandard/student/STSint.testinput.answers-students.sent1.chunk.txt"
|
||||
#chunked_path2 = "test_goldStandard/student/STSint.testinput.answers-students.sent2.chunk.txt"
|
||||
#alignment_path = "test_goldStandard/student/STSint.testinput.answers-students.wa"
|
||||
|
||||
# paths to headlines
|
||||
chunked_path1 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
||||
chunked_path2 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
||||
alignment_path = "test_goldStandard/headlines/STSint.testinput.headlines.wa"
|
||||
|
||||
# load data
|
||||
studentAnserws = processing.load_sentences(studentAnswers1_path, studentAnswers1_path)
|
||||
goldstandard_chunked = processing.load_chunked(studentAnsewrs_chunked_path1, studentAnsewrs_chunked_path2)
|
||||
goldstandard_alignment = processing.load_alignment(studentsAnsewrs_alignment_path)
|
||||
#studentAnserws = processing.load_sentences(studentAnswers1_path, studentAnswers1_path)
|
||||
goldstandard_chunked = processing.load_chunked(chunked_path1, chunked_path2)
|
||||
goldstandard_alignment = processing.load_alignment(alignment_path)
|
||||
|
||||
# get a nice anwser-student table
|
||||
data = pd.merge(goldstandard_chunked, goldstandard_alignment, left_index=True, right_index=True)
|
||||
print(data)
|
||||
#print(data)
|
||||
|
||||
data_for_chat = processing.get_chunks_as_text(data)
|
||||
|
||||
# generate a few examples
|
||||
#for i in range(1, 10):
|
||||
# print(processing.generate_alignment_format(data, i))
|
||||
# print("correct anwser for this is: ")
|
||||
# print(data["alignment_text"][i])
|
||||
# best prompt so far
|
||||
data_for_chat, indexes = processing.get_chunks_as_text(data)
|
||||
|
||||
client = gpt_alignment.createGPT()
|
||||
responses = []
|
||||
for i in range(0, len(data_for_chat)):
|
||||
responses.append(gpt_alignment.callApi(client, data_for_chat[i]))
|
||||
|
||||
# Specify the file path
|
||||
file_path = "alignments_unformatted_student.txt"
|
||||
|
||||
# Writing to the file with repr() to preserve "\n" characters
|
||||
with open(file_path, 'w') as file:
|
||||
|
||||
46
create_alignments_with_examples.py
Normal file
46
create_alignments_with_examples.py
Normal file
@ -0,0 +1,46 @@
|
||||
import processing
|
||||
import pandas as pd
|
||||
import gpt_alignment
|
||||
|
||||
# Specify the output file path
|
||||
file_path = "alignments_with_training_headlines2.wa"
|
||||
|
||||
# paths to students andsewrs database
|
||||
#chunked_path1 = "test_goldStandard/student/STSint.testinput.answers-students.sent1.chunk.txt"
|
||||
#chunked_path2 = "test_goldStandard/student/STSint.testinput.answers-students.sent2.chunk.txt"
|
||||
#alignment_path = "test_goldStandard/student/STSint.testinput.answers-students.wa"
|
||||
|
||||
# paths to headlines
|
||||
chunked_path1 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
||||
chunked_path2 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
||||
alignment_path = "test_goldStandard/headlines/STSint.testinput.headlines.wa"
|
||||
|
||||
# load data
|
||||
goldstandard_chunked = processing.load_chunked(chunked_path1, chunked_path2)
|
||||
goldstandard_alignment = processing.load_alignment(alignment_path)
|
||||
|
||||
# get a nice anwser-student table
|
||||
data = pd.merge(goldstandard_chunked, goldstandard_alignment, left_index=True, right_index=True)
|
||||
#print(data)
|
||||
|
||||
train, test = processing.generate_train_test_split(data)
|
||||
|
||||
data_for_chat, indexes = processing.get_chunks_as_text(test)
|
||||
_, indexes_of_training = processing.get_chunks_as_text(train)
|
||||
indexes_of_training = [i+1 for i in indexes_of_training]
|
||||
indexes = [i+1 for i in indexes]
|
||||
print(indexes_of_training)
|
||||
print(indexes)
|
||||
|
||||
client = gpt_alignment.createGPT()
|
||||
responses = []
|
||||
for i in range(0, 10):#len(data_for_chat)):
|
||||
responses.append([gpt_alignment.callApi_examples(client, train, data_for_chat[i]), indexes[i]])
|
||||
|
||||
with open(file_path, 'w') as file:
|
||||
for i, r in enumerate(responses):
|
||||
file.write("<sentence id=\"" + str(r[1]+1) + "\" status=\"\">\n")
|
||||
file.write("<alignment>\n")
|
||||
file.write(r[0])
|
||||
file.write("\n</alignment>\n")
|
||||
file.write("</sentence>\n\n")
|
||||
@ -1,289 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"{\n",
|
||||
" \"cells\": [\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 58,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [],\n",
|
||||
" \"source\": [\n",
|
||||
" \"import pandas as pd\\n\",\n",
|
||||
" \"import numpy as np\\n\",\n",
|
||||
" \"from lxml import etree\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"markdown\",\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"source\": [\n",
|
||||
" \"# Input format\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"The input consists of two files:\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"* a file with the first sentences in each pair\\n\",\n",
|
||||
" \"* a file with the second sentences in each pair\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"The sentences are tokenized.\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"Please check STSint.input.*.sent1.txt and STSint.*.input.sent2.txt\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"Participants can also use the input sentences with gold standard chunks:\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"* a file with the first sentences in each pair, with '[' and ']' to mark chunks\\n\",\n",
|
||||
" \"* a file with the second sentences in each pair, with '[' and ']' to mark chunks\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"Please check STSint.input.*.sent1.chunk.txt and STSint.input.*.sent2.chunk.txt\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 26,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [],\n",
|
||||
" \"source\": [\n",
|
||||
" \"# loading unchunked headlines\\n\",\n",
|
||||
" \"unchunked_path_1 = \\\"test_goldstandard/STSint.testinput.headlines.sent1.txt\\\"\\n\",\n",
|
||||
" \"unchunked_path_2 = \\\"test_goldstandard/STSint.testinput.headlines.sent2.txt\\\"\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"headlines_sentance1 = pd.read_csv(unchunked_path_1, dtype=str, delimiter=\\\"}\\\", header=None)\\n\",\n",
|
||||
" \"headlines_sentance1.columns = [\\\"headlines_sentance1\\\"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"headlines_sentance2 = pd.read_csv(unchunked_path_2, dtype=str, delimiter=\\\"}\\\", header=None)\\n\",\n",
|
||||
" \"headlines_sentance2.columns = [\\\"headlines_sentance2\\\"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"headlines = pd.concat([headlines_sentance1, headlines_sentance2], axis=1)\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 22,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [],\n",
|
||||
" \"source\": [\n",
|
||||
" \"# loading unchunked images\\n\",\n",
|
||||
" \"unchunked_path_1 = \\\"test_goldstandard/STSint.testinput.images.sent1.txt\\\"\\n\",\n",
|
||||
" \"unchunked_path_2 = \\\"test_goldstandard/STSint.testinput.images.sent2.txt\\\"\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"image_sentance1 = pd.read_csv(unchunked_path_1, dtype=str, delimiter=\\\"}\\\", header=None)\\n\",\n",
|
||||
" \"image_sentance1.columns = [\\\"image_sentance1\\\"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"image_sentance2 = pd.read_csv(unchunked_path_2, dtype=str, delimiter=\\\"}\\\", header=None)\\n\",\n",
|
||||
" \"image_sentance2.columns = [\\\"image_sentance2\\\"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"images = pd.concat([image_sentance1, image_sentance2], axis=1)\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 23,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [],\n",
|
||||
" \"source\": [\n",
|
||||
" \"# loading unchunked students\\n\",\n",
|
||||
" \"unchunked_path_1 = \\\"test_goldstandard/STSint.testinput.answers-students.sent1.txt\\\"\\n\",\n",
|
||||
" \"unchunked_path_2 = \\\"test_goldstandard/STSint.testinput.answers-students.sent2.txt\\\"\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"student_sentance1 = pd.read_csv(unchunked_path_1, dtype=str, delimiter=\\\"}\\\", header=None)\\n\",\n",
|
||||
" \"student_sentance1.columns = [\\\"student_sentance1\\\"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"student_sentance2 = pd.read_csv(unchunked_path_2, dtype=str, delimiter=\\\"}\\\", header=None)\\n\",\n",
|
||||
" \"student_sentance2.columns = [\\\"student_sentance2\\\"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"students = pd.concat([student_sentance1, student_sentance2], axis=1)\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 31,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [],\n",
|
||||
" \"source\": [\n",
|
||||
" \"def chunk2list(chunks:str) -> list:\\n\",\n",
|
||||
" \" \\\"\\\"\\\"\\n\",\n",
|
||||
" \" Takes str that is all chunks from a chunked sentance and returns a list of all the chunks as seperate items \\n\",\n",
|
||||
" \" \\\"\\\"\\\"\\n\",\n",
|
||||
" \" chunks = chunks.replace('[', '')\\n\",\n",
|
||||
" \" chunks = chunks.replace(']', '')\\n\",\n",
|
||||
" \" chunks = chunks.replace(' ', '|')\\n\",\n",
|
||||
" \" split = chunks.split('|')\\n\",\n",
|
||||
" \" return split\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 33,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [],\n",
|
||||
" \"source\": [\n",
|
||||
" \"# loading chunked headlines\\n\",\n",
|
||||
" \"chunked_path_1 = \\\"test_goldstandard/STSint.testinput.headlines.sent1.chunk.txt\\\"\\n\",\n",
|
||||
" \"chunked_path_2 = \\\"test_goldstandard/STSint.testinput.headlines.sent2.chunk.txt\\\"\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"headlines_chunked_sentance1 = pd.read_csv(chunked_path_1, dtype=str, delimiter=\\\"}\\\", header=None)\\n\",\n",
|
||||
" \"headlines_chunked_sentance1.columns = [\\\"headlines_chunked_sentance1\\\"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"headlines_chunked_sentance2 = pd.read_csv(chunked_path_2, dtype=str, delimiter=\\\"}\\\", header=None)\\n\",\n",
|
||||
" \"headlines_chunked_sentance2.columns = [\\\"headlines_chunked_sentance2\\\"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"headlines_chunked = pd.concat([headlines_chunked_sentance1, headlines_chunked_sentance2], axis=1)\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# convert chunks from str to list\\n\",\n",
|
||||
" \"headlines_chunked['headlines_chunked_sentance1'] = headlines_chunked['headlines_chunked_sentance1'].apply(chunk2list)\\n\",\n",
|
||||
" \"headlines_chunked['headlines_chunked_sentance2'] = headlines_chunked['headlines_chunked_sentance2'].apply(chunk2list)\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 72,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [],\n",
|
||||
" \"source\": [\n",
|
||||
" \"headlines_aligned_path = \\\"test_goldstandard/STSint.testinput.headlines.wa\\\" \\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"with open(headlines_aligned_path, 'r') as file:\\n\",\n",
|
||||
" \" file_content = file.read()\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# <==> and & break xml loaders so it needs to be replaces with something else\\n\",\n",
|
||||
" \"modified_content = file_content.replace('<==>', 'ARROWS_PLACEHOLDER').replace('&', 'AMPERSAND_PLACEHOLDER')\\n\",\n",
|
||||
" \"# it also needs a root wrapped to function properly \\n\",\n",
|
||||
" \"modified_content = f'<root>{modified_content}</root>'\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"modified_file_path = 'test_goldstandard/STSint.testinput.headlines.fixedarrows.wa'\\n\",\n",
|
||||
" \"with open(modified_file_path, 'w') as modified_file:\\n\",\n",
|
||||
" \" modified_file.write(modified_content)\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Parse the modified file using ElementTree\\n\",\n",
|
||||
" \"tree = etree.parse(modified_file_path)\\n\",\n",
|
||||
" \"root = tree.getroot()\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# function for printing XML\\n\",\n",
|
||||
" \"def prettyprint(element, **kwargs):\\n\",\n",
|
||||
" \" xml = etree.tostring(element, pretty_print=True, **kwargs)\\n\",\n",
|
||||
" \" print(xml.decode(), end='')\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 85,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [],\n",
|
||||
" \"source\": [\n",
|
||||
" \"# get ansewrs\\n\",\n",
|
||||
" \"alignments_data = []\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"for alignment in root.xpath('//alignment'):\\n\",\n",
|
||||
" \" # Extract relevant information from the alignment element\\n\",\n",
|
||||
" \" data = {\\n\",\n",
|
||||
" \" 'sentence_id': alignment.xpath('ancestor::sentence/@id')[0],\\n\",\n",
|
||||
" \" 'alignment_text': alignment.text\\n\",\n",
|
||||
" \" }\\n\",\n",
|
||||
" \" alignments_data.append(data)\\n\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 88,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [\n",
|
||||
" {\n",
|
||||
" \"name\": \"stdout\",\n",
|
||||
" \"output_type\": \"stream\",\n",
|
||||
" \"text\": [\n",
|
||||
" \"\\n\",\n",
|
||||
" \"6 7 8 ARROWS_PLACEHOLDER 5 6 // EQUI // 5 // for the Philippines ARROWS_PLACEHOLDER to Philippines \\n\",\n",
|
||||
" \"5 ARROWS_PLACEHOLDER 2 // SIMI // 3 // departs ARROWS_PLACEHOLDER sends \\n\",\n",
|
||||
" \"9 ARROWS_PLACEHOLDER 0 // NOALI // NIL // Thursday ARROWS_PLACEHOLDER -not aligned- \\n\",\n",
|
||||
" \"1 ARROWS_PLACEHOLDER 1 // EQUI // 5 // China ARROWS_PLACEHOLDER China \\n\",\n",
|
||||
" \"2 3 4 ARROWS_PLACEHOLDER 3 4 // REL // 4 // 's Peace Ark ARROWS_PLACEHOLDER aid team \\n\",\n",
|
||||
" \"\\n\"\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"source\": [\n",
|
||||
" \"# test out the format\\n\",\n",
|
||||
" \"print(alignments_data[0][\\\"alignment_text\\\"])\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 107,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [],\n",
|
||||
" \"source\": [\n",
|
||||
" \"y = pd.DataFrame(alignments_data)\\n\",\n",
|
||||
" \"y = y.drop(columns=[\\\"sentence_id\\\"])\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"#return to <==> and &\\n\",\n",
|
||||
" \"def return_characteers(cell: str) -> str:\\n\",\n",
|
||||
" \" cell = cell.replace('ARROWS_PLACEHOLDER', '<==>')\\n\",\n",
|
||||
" \" cell = cell.replace('AMPERSAND_PLACEHOLDER', '&')\\n\",\n",
|
||||
" \" return cell\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"y[\\\"alignment_text\\\"] = y[\\\"alignment_text\\\"].apply(return_characteers)\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": 108,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"outputs\": [\n",
|
||||
" {\n",
|
||||
" \"name\": \"stderr\",\n",
|
||||
" \"output_type\": \"stream\",\n",
|
||||
" \"text\": [\n",
|
||||
" \"c:\\\\Users\\\\Mati\\\\AppData\\\\Local\\\\Programs\\\\Python\\\\Python312\\\\Lib\\\\site-packages\\\\numpy\\\\core\\\\fromnumeric.py:59: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future version. Please use 'DataFrame.transpose' instead.\\n\",\n",
|
||||
" \" return bound(*args, **kwds)\\n\"\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"source\": [\n",
|
||||
" \"# generate train test split\\n\",\n",
|
||||
" \"x = headlines_chunked\\n\",\n",
|
||||
" \"y = y\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"data = pd.merge(x, y, left_index=True, right_index=True)\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"train, validate, test = np.split(data.sample(frac=1, random_state=42), [int(.6*len(data)), int(.8*len(data))])\"\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"metadata\": {\n",
|
||||
" \"kernelspec\": {\n",
|
||||
" \"display_name\": \"Python 3\",\n",
|
||||
" \"language\": \"python\",\n",
|
||||
" \"name\": \"python3\"\n",
|
||||
" },\n",
|
||||
" \"language_info\": {\n",
|
||||
" \"codemirror_mode\": {\n",
|
||||
" \"name\": \"ipython\",\n",
|
||||
" \"version\": 3\n",
|
||||
" },\n",
|
||||
" \"file_extension\": \".py\",\n",
|
||||
" \"mimetype\": \"text/x-python\",\n",
|
||||
" \"name\": \"python\",\n",
|
||||
" \"nbconvert_exporter\": \"python\",\n",
|
||||
" \"pygments_lexer\": \"ipython3\",\n",
|
||||
" \"version\": \"3.12.0\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"nbformat\": 4,\n",
|
||||
" \"nbformat_minor\": 2\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
176
examples.txt
176
examples.txt
@ -1,176 +0,0 @@
|
||||
seq1:
|
||||
1) terminal 1 and the positive terminal
|
||||
2) are connected.
|
||||
|
||||
seq2:
|
||||
1) Terminal 1 and the positive terminal
|
||||
2) are separated
|
||||
3) by the gap
|
||||
|
||||
correct anwser for this is:
|
||||
|
||||
7 8 <==> 7 8 // OPPO // 4 // are connected. <==> are separated
|
||||
0 <==> 9 10 11 // NOALI // NIL // -not aligned- <==> by the gap
|
||||
1 2 3 4 5 6 <==> 1 2 3 4 5 6 // EQUI // 5 // terminal 1 and the positive terminal <==> Terminal 1 and the positive terminal
|
||||
|
||||
seq1:
|
||||
1) positive battery
|
||||
2) is seperated
|
||||
3) by a gap
|
||||
4) from terminal 2
|
||||
|
||||
seq2:
|
||||
1) Terminal 2 and the positive terminal
|
||||
2) are separated
|
||||
3) by the gap
|
||||
|
||||
correct anwser for this is:
|
||||
|
||||
1 2 8 9 10 <==> 1 2 3 4 5 6 // EQUI // 5 // positive battery from terminal 2 <==> Terminal 2 and the positive terminal
|
||||
5 6 7 <==> 9 10 11 // EQUI // 5 // by a gap <==> by the gap
|
||||
3 4 <==> 7 8 // EQUI // 5 // is seperated <==> are separated
|
||||
|
||||
seq1:
|
||||
1) There
|
||||
2) is
|
||||
3) no difference
|
||||
4) between the two terminals.
|
||||
|
||||
seq2:
|
||||
1) The terminals
|
||||
2) are
|
||||
3) in the same state.
|
||||
|
||||
correct anwser for this is:
|
||||
|
||||
1 <==> 0 // NOALI // NIL // There <==> -not aligned-
|
||||
3 4 <==> 4 5 6 7 // SIMI // 3 // no difference <==> in the same state.
|
||||
2 <==> 3 // EQUI // 5 // is <==> are
|
||||
5 6 7 8 <==> 1 2 // SPE1 // 4 // between the two terminals. <==> The terminals
|
||||
|
||||
seq1:
|
||||
1) the switch
|
||||
2) has to be contained
|
||||
3) in the same path
|
||||
4) as
|
||||
5) the bulb and the battery
|
||||
|
||||
seq2:
|
||||
1) The switch and the bulb
|
||||
2) have to be
|
||||
3) in the same path
|
||||
|
||||
correct anwser for this is:
|
||||
|
||||
7 8 9 10 <==> 9 10 11 12 // EQUI // 5 // in the same path <==> in the same path
|
||||
3 4 5 6 <==> 6 7 8 // EQUI // 5 // has to be contained <==> have to be
|
||||
1 2 12 13 14 15 16 <==> 1 2 3 4 5 // SPE2 // 4 // the switch the bulb and the battery <==> The switch and the bulb
|
||||
11 <==> 0 // NOALI // NIL // as <==> -not aligned-
|
||||
|
||||
seq1:
|
||||
1) there
|
||||
2) is
|
||||
3) a gap
|
||||
4) in terminal 1
|
||||
|
||||
seq2:
|
||||
1) Terminals 1 and 2
|
||||
2) are
|
||||
3) in the same electrical state
|
||||
|
||||
correct anwser for this is:
|
||||
|
||||
5 6 7 <==> 1 2 3 4 // SPE1 // 4 // in terminal 1 <==> Terminals 1 and 2
|
||||
1 <==> 0 // NOALI // NIL // there <==> -not aligned-
|
||||
3 4 <==> 6 7 8 9 10 // REL // 2 // a gap <==> in the same electrical state
|
||||
2 <==> 5 // SIMI // 3 // is <==> are
|
||||
|
||||
seq1:
|
||||
1) The battery
|
||||
2) uses
|
||||
3) the chemical reation
|
||||
4) to create
|
||||
5) voltage
|
||||
|
||||
seq2:
|
||||
1) A battery
|
||||
2) uses
|
||||
3) a chemical reaction
|
||||
4) to maintain
|
||||
5) different electrical states
|
||||
6) at the terminals
|
||||
|
||||
correct anwser for this is:
|
||||
|
||||
3 <==> 3 // EQUI // 5 // uses <==> uses
|
||||
7 8 <==> 7 8 // SIMI // 2 // to create <==> to maintain
|
||||
4 5 6 <==> 4 5 6 // EQUI // 5 // the chemical reation <==> a chemical reaction
|
||||
9 <==> 9 10 11 // REL // 3 // voltage <==> different electrical states
|
||||
0 <==> 12 13 14 // NOALI // NIL // -not aligned- <==> at the terminals
|
||||
1 2 <==> 1 2 // EQUI // 5 // The battery <==> A battery
|
||||
|
||||
seq1:
|
||||
1) a batter
|
||||
2) uses
|
||||
3) a chemical reaction
|
||||
4) to create
|
||||
5) voltage
|
||||
|
||||
seq2:
|
||||
1) A battery
|
||||
2) uses
|
||||
3) a chemical reaction
|
||||
4) to maintain
|
||||
5) different electrical states
|
||||
6) at the terminals
|
||||
|
||||
correct anwser for this is:
|
||||
|
||||
3 <==> 3 // EQUI // 5 // uses <==> uses
|
||||
7 8 <==> 7 8 // SIMI // 2 // to create <==> to maintain
|
||||
9 <==> 9 10 11 // REL // 3 // voltage <==> different electrical states
|
||||
4 5 6 <==> 4 5 6 // EQUI // 5 // a chemical reaction <==> a chemical reaction
|
||||
1 2 <==> 1 2 // EQUI // 5 // a batter <==> A battery
|
||||
0 <==> 12 13 14 // NOALI // NIL // -not aligned- <==> at the terminals
|
||||
|
||||
seq1:
|
||||
1) the battery
|
||||
2) alone
|
||||
3) is
|
||||
4) in a closed path
|
||||
|
||||
seq2:
|
||||
1) The battery
|
||||
2) is contained
|
||||
3) in a path
|
||||
4) which
|
||||
5) does not contain any other components
|
||||
|
||||
correct anwser for this is:
|
||||
|
||||
1 2 <==> 1 2 // EQUI // 5 // the battery <==> The battery
|
||||
4 <==> 3 4 // EQUI // 5 // is <==> is contained
|
||||
5 6 7 8 <==> 5 6 7 // EQUI // 5 // in a closed path <==> in a path
|
||||
3 <==> 9 10 11 12 13 14 // EQUI // 5 // alone <==> does not contain any other components
|
||||
5 6 7 8 <==> 8 // EQUI // 5 // in a closed path <==> which
|
||||
|
||||
seq1:
|
||||
1) Because
|
||||
2) there
|
||||
3) is
|
||||
4) a gap
|
||||
5) in the circuit.
|
||||
|
||||
seq2:
|
||||
1) there
|
||||
2) is
|
||||
3) a gap
|
||||
4) in the circuit
|
||||
|
||||
correct anwser for this is:
|
||||
|
||||
2 <==> 1 // EQUI // 5 // there <==> there
|
||||
4 5 <==> 3 4 // EQUI // 5 // a gap <==> a gap
|
||||
1 <==> 0 // NOALI // NIL // Because <==> -not aligned-
|
||||
6 7 8 <==> 5 6 7 // EQUI // 5 // in the circuit. <==> in the circuit
|
||||
3 <==> 2 // EQUI // 5 // is <==> is
|
||||
@ -1,24 +1,29 @@
|
||||
import pandas as pd
|
||||
import processing
|
||||
import re
|
||||
import copy
|
||||
|
||||
# output file name
|
||||
file_path = "headlines_fixed_format.wa"
|
||||
|
||||
# paths to students andsewrs database
|
||||
studentAnswers1_path = "test_goldStandard/student/STSint.testinput.answers-students.sent1.txt"
|
||||
studentAnswers2_path = "test_goldStandard/student/STSint.testinput.answers-students.sent2.txt"
|
||||
studentAnsewrs_chunked_path1 = "test_goldStandard/student/STSint.testinput.answers-students.sent1.chunk.txt"
|
||||
studentAnsewrs_chunked_path2 = "test_goldStandard/student/STSint.testinput.answers-students.sent2.chunk.txt"
|
||||
studentsAnsewrs_alignment_path = "test_goldStandard/student/STSint.testinput.answers-students.wa"
|
||||
#chunked_path1 = "test_goldStandard/student/STSint.testinput.answers-students.sent1.chunk.txt"
|
||||
#chunked_path2 = "test_goldStandard/student/STSint.testinput.answers-students.sent2.chunk.txt"
|
||||
#alignment_path = "test_goldStandard/student/STSint.testinput.answers-students.wa"
|
||||
|
||||
# paths to headlines database
|
||||
chunked_path1 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
||||
chunked_path2 = "test_goldStandard/headlines/STSint.testinput.headlines.sent1.chunk.txt"
|
||||
alignment_path = "test_goldStandard/headlines/STSint.testinput.headlines.wa"
|
||||
|
||||
# load data
|
||||
studentAnserws = processing.load_sentences(studentAnswers1_path, studentAnswers1_path)
|
||||
goldstandard_chunked = processing.load_chunked(studentAnsewrs_chunked_path1, studentAnsewrs_chunked_path2)
|
||||
goldstandard_alignment = processing.load_alignment(studentsAnsewrs_alignment_path)
|
||||
goldstandard_chunked = processing.load_chunked(chunked_path1, chunked_path2)
|
||||
goldstandard_alignment = processing.load_alignment(alignment_path)
|
||||
|
||||
# get a nice anwser-student table
|
||||
# get a nice table
|
||||
data = pd.merge(goldstandard_chunked, goldstandard_alignment, left_index=True, right_index=True)
|
||||
|
||||
|
||||
file_path = "alignments_unformatted_student.txt"
|
||||
file_path = "alignments_unformatted_headlines.txt"
|
||||
|
||||
# open generated alignments
|
||||
with open(file_path, 'r') as file:
|
||||
@ -28,46 +33,118 @@ for i, r in enumerate(responses):
|
||||
print("\nresponse number " + str(i))
|
||||
print(r)
|
||||
|
||||
unformatted = copy.deepcopy(responses)
|
||||
|
||||
for i, response in enumerate(responses):
|
||||
temp = response
|
||||
temp = response.lstrip("\n")
|
||||
temp = temp.rstrip("\n")
|
||||
|
||||
temp = re.sub(r'(?<==)>', '> ', temp) # add space after >
|
||||
temp = re.sub(r'\s{2,}', ' ', temp) # remove double space
|
||||
temp = re.sub(r'(?<!\n) +(?!\n)', ' ', temp) # remove double space
|
||||
|
||||
temp = temp.replace("]", "")
|
||||
temp = temp.replace("[", "")
|
||||
temp = temp.replace("\'", "")
|
||||
temp = temp.replace(" ==> ", " <==> ")
|
||||
temp = temp.replace(" => ", " <==> ")
|
||||
temp = temp.replace(" <=> ", " <==> ")
|
||||
temp = temp.replace(" <== ", " <==> ")
|
||||
temp = temp.replace("<==> //", " <==> 0 //")
|
||||
temp = temp.replace("<==> //", " <==> 0 //")
|
||||
temp = temp.replace("NOALI <==>", "0 <==>")
|
||||
temp = temp.replace("<==> NOALI", "<==> 0")
|
||||
|
||||
temp = re.sub(r'^(<==>)', r'0 \1', temp)
|
||||
|
||||
temp = re.sub(r'\s{2,}', ' ', temp) # remove double space
|
||||
|
||||
#for j, chunk in enumerate(data.iloc[i]["chunked_sentance1"].sorted(key=len, reverse=True)):
|
||||
for j, chunk in enumerate(sorted(data.iloc[i]["chunked_sentance1"], key=lambda x: len(x), reverse=True)):
|
||||
temp = temp.replace(chunk, str(j+1))
|
||||
for j, chunk in enumerate(sorted(data.iloc[i]["chunked_sentance2"], key=lambda x: len(x), reverse=True)):
|
||||
temp = temp.replace(chunk, str(j+1))
|
||||
temp = temp.replace("\n// NOALI", "\n0 <==> 0 // NOALI")
|
||||
temp = temp.replace("\n // ", "\n0 <==> ")
|
||||
temp = temp.replace("\n// ", "\n0 <==> ")
|
||||
temp = re.sub(r'(^|[^<])(==>+)', r' <==>', temp)
|
||||
temp = temp.replace("// - //", "// 0 //")
|
||||
temp = temp.replace("// score", "// ")
|
||||
temp = temp.replace("// alignment type", "// NOALI")
|
||||
temp = temp.replace("> -", "> 0")
|
||||
temp = temp.replace("- <", "0 <")
|
||||
temp = temp.replace("// //", "// NOALI //")
|
||||
temp = temp.replace("equi", "EQUI")
|
||||
temp = re.sub(r'\d\. ', '', temp) # remove 1., 2. ...
|
||||
temp = temp.upper()
|
||||
|
||||
responses[i] = temp
|
||||
temp = re.sub(r'^(<==>)', r'0 \1', temp)
|
||||
temp = re.sub(r'(?<!\n) +(?!\n)', ' ', temp) # remove double space
|
||||
|
||||
print("\nafter formatting\n")
|
||||
for i, r in enumerate(responses):
|
||||
print("\nresponse number " + str(i))
|
||||
print(r)
|
||||
temp = temp.split("\n")
|
||||
for k, t in enumerate(temp):
|
||||
if "<==>" not in temp[k]:
|
||||
temp[k] = "0 <==> " + temp[k]
|
||||
temp[k] = temp[k].split("<==>")
|
||||
temp[k] = [temp[k][0], *temp[k][1].split("//")]
|
||||
|
||||
chunk1arr = data.iloc[i]["chunked_sentance1"]
|
||||
q = 1
|
||||
numberList = []
|
||||
for chunk in chunk1arr:
|
||||
chunk = re.sub(r'(?<!\n) +(?!\n)', ' ', chunk)
|
||||
n_of_words = len(chunk.strip().split(" "))
|
||||
index_str = ""
|
||||
for qq in range(q, q + n_of_words):
|
||||
index_str = index_str + str(qq) + " "
|
||||
numberList.append(index_str)
|
||||
q = q + n_of_words
|
||||
|
||||
# write to file
|
||||
file_path = "student_fixed_format.txt"
|
||||
for j, chunk in enumerate(data.iloc[i]["chunked_sentance1"]):
|
||||
pattern = re.compile(chunk, re.IGNORECASE)
|
||||
temp[k][0] = pattern.sub(numberList[j], temp[k][0])
|
||||
|
||||
chunk1arr = data.iloc[i]["chunked_sentance2"]
|
||||
q = 1
|
||||
numberList = []
|
||||
for chunk in chunk1arr:
|
||||
chunk = re.sub(r'(?<!\n) +(?!\n)', ' ', chunk)
|
||||
n_of_words = len(chunk.strip().split(" "))
|
||||
index_str = ""
|
||||
for qq in range(q, q + n_of_words):
|
||||
index_str = index_str + str(qq) + " "
|
||||
numberList.append(index_str)
|
||||
q = q + n_of_words
|
||||
|
||||
for j, chunk in enumerate(data.iloc[i]["chunked_sentance2"]):
|
||||
pattern = re.compile(chunk, re.IGNORECASE)
|
||||
temp[k][1] = pattern.sub(numberList[j], temp[k][1])
|
||||
|
||||
if any(char.isalpha() and ord(char) < 128 for char in temp[k][0]) or any(char.isalpha() and ord(char) < 128 for char in temp[k][1]):
|
||||
temp[k] = ""
|
||||
if len(temp[k]) >= 4:
|
||||
temp[k][3] = temp[k][3].replace("NOALI", "0")
|
||||
if temp[k][3] == "":
|
||||
temp[k][3] = " 0 "
|
||||
if temp[k][3] == " ":
|
||||
temp[k][3] = " 0 "
|
||||
temp[k] = temp[k][0] + " <==> " + temp[k][1] + " // " + temp[k][2] + " // " + temp[k][3]
|
||||
elif len(temp[k]) == 3:
|
||||
temp[k] = temp[k][0] + " <==> " + temp[k][1] + " // " + temp[k][2] + " // 0"
|
||||
|
||||
temp[k] = re.sub(r'\s{2,}', ' ', temp[k]) # remove double space
|
||||
|
||||
temp = [x for x in temp if x != ""]
|
||||
responses[i] = "\n".join(temp).strip()
|
||||
|
||||
indexes = []
|
||||
responses_final = []
|
||||
rejected_indexes = []
|
||||
for n, r in enumerate(responses):
|
||||
if r == '':
|
||||
rejected_indexes.append(n+1)
|
||||
continue
|
||||
if r == '\n':
|
||||
rejected_indexes.append(n+1)
|
||||
continue
|
||||
indexes.append(n+1)
|
||||
responses_final.append(r)
|
||||
|
||||
print("rejected indexes:")
|
||||
print(rejected_indexes)
|
||||
|
||||
with open(file_path, 'w') as file:
|
||||
for i, r in enumerate(responses):
|
||||
for i, r in zip(indexes, responses_final):
|
||||
file.write("<sentence id=\"" + str(i+1) + "\" status=\"\">\n")
|
||||
file.write("<alignment>\n")
|
||||
file.write(r)
|
||||
file.write("</alignment>\n")
|
||||
file.write("</sentence>\n")
|
||||
file.write("\n</alignment>\n")
|
||||
file.write("</sentence>\n\n")
|
||||
101
format_alignments_with_training.py
Normal file
101
format_alignments_with_training.py
Normal file
@ -0,0 +1,101 @@
|
||||
import pandas as pd
|
||||
import processing
|
||||
import re
|
||||
import copy
|
||||
|
||||
# paths to students andsewrs database
|
||||
studentAnswers1_path = "test_goldStandard/student/STSint.testinput.answers-students.sent1.txt"
|
||||
studentAnswers2_path = "test_goldStandard/student/STSint.testinput.answers-students.sent2.txt"
|
||||
studentAnsewrs_chunked_path1 = "test_goldStandard/student/STSint.testinput.answers-students.sent1.chunk.txt"
|
||||
studentAnsewrs_chunked_path2 = "test_goldStandard/student/STSint.testinput.answers-students.sent2.chunk.txt"
|
||||
studentsAnsewrs_alignment_path = "test_goldStandard/student/STSint.testinput.answers-students.wa"
|
||||
|
||||
# load data
|
||||
studentAnserws = processing.load_sentences(studentAnswers1_path, studentAnswers1_path)
|
||||
goldstandard_chunked = processing.load_chunked(studentAnsewrs_chunked_path1, studentAnsewrs_chunked_path2)
|
||||
goldstandard_alignment = processing.load_alignment(studentsAnsewrs_alignment_path)
|
||||
|
||||
# get a nice anwser-student table
|
||||
data = pd.merge(goldstandard_chunked, goldstandard_alignment, left_index=True, right_index=True)
|
||||
|
||||
|
||||
file_path = "alignments_with_training_student_K.wa"
|
||||
|
||||
responses = processing.load_alignment(file_path)
|
||||
responses = responses["alignment_text"].to_list()
|
||||
|
||||
|
||||
for i, r in enumerate(responses):
|
||||
print("\nresponse number " + str(i))
|
||||
print(r)
|
||||
|
||||
unformatted = copy.deepcopy(responses)
|
||||
|
||||
for i, response in enumerate(responses):
|
||||
temp = response.lstrip("\n")
|
||||
temp = temp.rstrip("\n")
|
||||
|
||||
temp = re.sub(r'(?<==)>', '> ', temp) # add space after >
|
||||
temp = re.sub(r'(?<!\n) +(?!\n)', ' ', temp) # remove double space
|
||||
|
||||
temp = temp.replace("]", "")
|
||||
temp = temp.replace("[", "")
|
||||
temp = temp.replace(" ==> ", " <==> ")
|
||||
temp = temp.replace(" => ", " <==> ")
|
||||
temp = temp.replace(" <=> ", " <==> ")
|
||||
temp = temp.replace(" <== ", " <==> ")
|
||||
temp = temp.replace("<==> //", " <==> 0 //")
|
||||
temp = temp.replace("<==> //", " <==> 0 //")
|
||||
temp = temp.replace("NOALI <==>", "0 <==>")
|
||||
temp = temp.replace("<==> NOALI", "<==> 0")
|
||||
temp = temp.replace("\n// NOALI", "\n0 <==> 0 // NOALI")
|
||||
temp = re.sub(r'(^|[^<])(==>+)', r' <==>', temp)
|
||||
|
||||
|
||||
temp = re.sub(r'^(<==>)', r'0 \1', temp)
|
||||
|
||||
temp = re.sub(r'(?<!\n) +(?!\n)', ' ', temp) # remove double space
|
||||
|
||||
temp = temp.split("\n")
|
||||
for k, t in enumerate(temp):
|
||||
temp[k] = temp[k].split("<==>")
|
||||
temp[k][1] = temp[k][1].split("//")
|
||||
|
||||
#for j, chunk in enumerate(data.iloc[i]["chunked_sentance1"].sorted(key=len, reverse=True)):
|
||||
for j, chunk in enumerate(sorted(data.iloc[i]["chunked_sentance1"], key=lambda x: len(x), reverse=True)):
|
||||
pattern = re.compile(chunk, re.IGNORECASE)
|
||||
temp[k][0] = pattern.sub(str(j+1), temp[k][0])
|
||||
#temp[k][0] = temp[k][0].replace(chunk, str(j+1))
|
||||
for j, chunk in enumerate(sorted(data.iloc[i]["chunked_sentance2"], key=lambda x: len(x), reverse=True)):
|
||||
pattern = re.compile(chunk, re.IGNORECASE)
|
||||
temp[k][1][0] = pattern.sub(str(j+1), temp[k][1][0])
|
||||
#temp[k][1][0] = temp[k][1][0].replace(chunk, str(j+1))
|
||||
if len(temp[k][1]) >= 3:
|
||||
temp[k] = temp[k][0] + " <==> " + temp[k][1][0] + " // " + temp[k][1][1] + " // " + temp[k][1][2]
|
||||
elif len(temp[k][1]) == 2:
|
||||
temp[k] = temp[k][0] + " <==> " + temp[k][1][0] + " // " + temp[k][1][1] + " // 0"
|
||||
temp[k] = re.sub(r'\s{2,}', ' ', temp[k]) # remove double space
|
||||
|
||||
responses[i] = "\n".join(temp)
|
||||
|
||||
|
||||
print("\nafter formatting\n")
|
||||
for i, r in enumerate(responses):
|
||||
print("\nresponse number " + str(i))
|
||||
print("FORMATTED\n")
|
||||
print(r)
|
||||
print("\nUNFORMATTED\n")
|
||||
print(unformatted[i])
|
||||
|
||||
|
||||
|
||||
# write to file
|
||||
file_path = "student_fixed_format_with_training.txt"
|
||||
|
||||
with open(file_path, 'w') as file:
|
||||
for i, r in enumerate(responses):
|
||||
file.write("<sentence id=\"" + str(i+1) + "\" status=\"\">\n")
|
||||
file.write("<alignment>\n")
|
||||
file.write(r)
|
||||
file.write("\n</alignment>\n")
|
||||
file.write("</sentence>\n\n")
|
||||
@ -2,6 +2,8 @@
|
||||
Get chunk alignment from chatGPT
|
||||
"""
|
||||
from openai import OpenAI
|
||||
import pandas as pd
|
||||
import processing
|
||||
|
||||
thePrompt = """You are a machine designed to align chunks from 2 sentences. This means you will be taking each chunk from one sentence and comparing it to every chunk from the other sentence. Choose the chunks with the strongest relation and assign them a score.
|
||||
|
||||
@ -18,12 +20,37 @@ The possible scores are a range from 0 to 5 where 0 means that the chunks are no
|
||||
|
||||
A chunk can be aligned to multiple different chunks. Chunks can also be grouped in a relation. If a chunk has no relation to any other chunk, give it a NOALI relation to a an empty chunk.
|
||||
|
||||
If there are spelling mistakes do not correct them.
|
||||
|
||||
Present the answers in this form:
|
||||
chunk from first sentence <==>chunk from the second sentence // alignment type // score
|
||||
chunk from first sentence <==> chunk from the second sentence // alignment type // score
|
||||
"""
|
||||
|
||||
thePrompt2 = """You are a machine designed to align chunks from 2 sentences. This means you will be taking each chunk from one sentence and comparing it to every chunk from the other sentence. Choose the chunks with the strongest relation and assign them a relation and a score.
|
||||
|
||||
The possible relations are:
|
||||
EQUI: both chunks have the same meaning
|
||||
OPPO: the meanings of the chunks are opposite
|
||||
SPE1: both chunks have similar meanings, but chunk in sentence 1 is more specific.
|
||||
SPE2: both chunks have similar meanings, but chunk in sentence 2 is more specific.
|
||||
SIMI: both chunks have similar meanings
|
||||
REL: both chunks are not considered similar, but they are closely related by some relation not mentioned above
|
||||
NOALI: this chunk has not any corresponding chunk in the other sentence.
|
||||
|
||||
The possible scores are a range from 0 to 5 where 0 means that the chunks are not related and 5 means their meanings are the same in the given context.
|
||||
|
||||
A chunk can be aligned with multiple different chunks. Chunks can also be grouped in a relation. If a chunk has no relation to any other chunk, give it a NOALI relation to a chunk "0".
|
||||
|
||||
If there are spelling mistakes, do not correct them.
|
||||
|
||||
Present the answers in this form:
|
||||
chunk from first sentence <==> chunk from the second sentence // relation // score // comment
|
||||
|
||||
in the comment you can explain your choice."""
|
||||
|
||||
|
||||
def createGPT() -> OpenAI:
|
||||
client = OpenAI(api_key="REDACTED_OPENAI_API_KEY")
|
||||
client = OpenAI(api_key="Your API key here")
|
||||
return client
|
||||
|
||||
|
||||
@ -36,3 +63,32 @@ def callApi(client:OpenAI, chunks:str):
|
||||
]
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
|
||||
|
||||
def callApi_examples(client:OpenAI, examples:pd.DataFrame, alignment:str):
|
||||
|
||||
user_input = []
|
||||
assistant_output = []
|
||||
for index, row in examples.iterrows():
|
||||
chunks = ""
|
||||
for chunk in row["chunked_sentance1"]:
|
||||
chunks = chunks + "[ " + chunk + " ] "
|
||||
chunks = chunks + "\n"
|
||||
for chunk in row["chunked_sentance2"]:
|
||||
chunks = chunks + "[ " + chunk + " ] "
|
||||
user_input.append(chunks)
|
||||
assistant_output.append(row["alignment_text"])
|
||||
|
||||
messages = []
|
||||
messages.append({"role": "user", "content": thePrompt})
|
||||
for u, a in zip(user_input, assistant_output):
|
||||
messages.append({"role": "user", "content": u})
|
||||
messages.append({"role": "assistant", "content": a})
|
||||
|
||||
messages.append({"role": "user", "content": alignment})
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
messages=messages
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
|
||||
3198
headlines_fixed_format.wa
Normal file
3198
headlines_fixed_format.wa
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,80 +0,0 @@
|
||||
Given the input (pairs of sentences divided into chunks) align the corresponding chunks. The chunks are based on those used in the CoNLL 2000 chunking task (Abney 1991, Tjong et al. 2000), with some adaptations.
|
||||
|
||||
the steps required fot this are as follows:
|
||||
|
||||
A. When aligning, take into account the deep meaning of the chunk in context, beyond the surface.
|
||||
B. One chunk can be aligned to more than one chunk, but only to prevent unaligned chunks.
|
||||
C. Do all 1:1 alignments first. When having two options to align, choose the strongest corresponding one first. A 1:1 alignment is comparing each chunk from one sentance to each chunk from the other sentance.
|
||||
D. After doing 1:1 alignments, check unaligned chunks. There are three options to align them, in this order of preference:
|
||||
1. Insert the unaligned chunk (or group of chunks) into an existing 1:1 alignment.
|
||||
2. Create a new relation, add a new score and label to the new relation.
|
||||
3. Chunks can be left unaligned if no corresponding chunk can be found. They are then assigned a NOALI label with score 0 with a relation to a non existing chunk 0
|
||||
E. Assign at least one label to each alignment.
|
||||
F. Try to leave as few unaligned chunks as possible.
|
||||
G. Keep it simple
|
||||
H. You can leave punctuations unaligned, as they will be ignored when evaluating. The interface requires that you annotate all tokens, so please tag them with the label for unaligned chunks
|
||||
|
||||
The scores are defined as follows:
|
||||
A similarity/relatedness score between the aligned chunks, from 5 (maximum similarity/relatedness) to 0 (no relation at all):
|
||||
5 if the meaning of both chunks is equivalent
|
||||
[4,3] iff the meaning of both chunks is very similar or closely related
|
||||
[2,1] iff the meaning of both chunks is slightly similar or somehow related
|
||||
0 iff the meaning of both chunks is completely unrelated.
|
||||
|
||||
What is more, there are different possible types of alignment:
|
||||
EQUI: both chunks have the same meaning, they are semantically equivalent in this context.
|
||||
OPPO: the meanings of the chunks are in opposition to each other, lying in an inherently incompatible binary relationship.
|
||||
SPE1: both chunks have similar meanings, but chunk in sentence 1 is more specific.
|
||||
SPE2: like SPE1, but it is the chunk in sentence 2 which is more specific.
|
||||
SIMI: both chunks have similar meanings, they share similar attributes and there is no EQUI, OPPO, SPE1 or SPE2 relation
|
||||
REL: both chunks are not considered similar but they are closely related by some relation not mentioned above (i.e. no EQUI, OPPO, SPE1, SPE2, or SIMI relation).
|
||||
NOALI: this chunk has not any corresponding chunk in the other sentence. Therefore, it is left unaligned.
|
||||
|
||||
Scores for NOALI will be ignored. EQUI should have a 5 score. The rest should have a score bigger than 0 but lower than 5.
|
||||
|
||||
the data will be provided in this format:
|
||||
seq1:
|
||||
1) sequance 1 chunk 1
|
||||
2) sequance 1 chunk 2
|
||||
3) sequance 1 chunk 3
|
||||
...
|
||||
|
||||
seq2:
|
||||
1) sequance 2 chunk 1
|
||||
2) sequance 2 chunk 2
|
||||
3) sequance 2 chunk 3
|
||||
...
|
||||
|
||||
where seq1: and seq2: indicate the beggining of a new chunk sequance. Each chunk beggins with a number that indicates the tokend-id.
|
||||
|
||||
|
||||
Each alignment is reported in one line as follows:
|
||||
token-id-seq1 <==> token-id-seq2 // type // score // comment
|
||||
|
||||
where:
|
||||
token-id-seq1 is a sequence of token indices (starting at 1) for the chunk(s) in sentence 1 (or 0 if the chunk in sentence 2 is not aligned)
|
||||
token-id-seq2 is a sequence of token indices (starting at 1) for the chunk(s) in sentence 2 (or 0 if the chunk in sentence 1 is not aligned)
|
||||
type is composed of one of the obligatory labels, concatenated to the optional ones by '_'
|
||||
score is a number from 0 to 5, or NIL (if type label is NOALI)
|
||||
comment is chunks written in their text form and an explanation of the relation
|
||||
|
||||
using the terminology form input data example, if a chunk has a single relation for example sequance 1 chunk 1 with sequance 2 chunk 1 report it as:
|
||||
1 <==> 1 // type // score // comment
|
||||
|
||||
if there are multiple chunks in relation with each other, seperate them with spaces. For example:
|
||||
1 <==> 1 2 // type // score // comment
|
||||
or
|
||||
2 3 <==> 1 2 // type // score / comment
|
||||
|
||||
provide the answer for final chunk relations as a plain text list of relations.
|
||||
|
||||
seq1:
|
||||
1) both bulbs a and c
|
||||
2) still
|
||||
3) have a closed path
|
||||
|
||||
seq2:
|
||||
1) Bulbs A and C
|
||||
2) are
|
||||
3) still
|
||||
4) in closed paths
|
||||
@ -120,14 +120,15 @@ def test_XML():
|
||||
# test out the format
|
||||
print(alignments_data[0]["alignment_text"])
|
||||
|
||||
def generate_train_test_split(x: pd.DataFrame, y: pd.DataFrame):
|
||||
def generate_train_test_split(xy: pd.DataFrame):
|
||||
"""
|
||||
Generates a train, validate, test split of the given dataframes in a 60% 20% 20% ratio
|
||||
"""
|
||||
data = pd.merge(x, y, left_index=True, right_index=True)
|
||||
train, validate, test = np.split(data.sample(frac=1, random_state=42), [int(.6*len(data)), int(.8*len(data))])
|
||||
#data = pd.merge(x, y, left_index=True, right_index=True)
|
||||
data = xy
|
||||
train, test = np.split(data.sample(frac=1, random_state=42), [int(.03*len(data))])
|
||||
|
||||
return train, validate, test
|
||||
return train, test
|
||||
|
||||
def generate_alignment_format(dataFrame:pd.DataFrame, id:int) -> str:
|
||||
output = "seq1:\n"
|
||||
@ -150,4 +151,4 @@ def get_chunks_as_text(data:pd.DataFrame) -> str:
|
||||
for chunk in row["chunked_sentance2"]:
|
||||
chunks = chunks + "[ " + chunk + " ] "
|
||||
output.append(chunks)
|
||||
return output
|
||||
return output, data.index
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
2915
student_fixed_format.wa
Normal file
2915
student_fixed_format.wa
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user