PhD

The LaTeX sources of my Ph.D. thesis
git clone https://esimon.eu/repos/PhD.git
Log | Files | Refs | README | LICENSE

commit 94bc74b6ce08b0e8b2028f81a5bd9010d9676d03
Author: Étienne Simon <esimon@esimon.eu>
Date:   Wed, 18 May 2022 03:24:56 +0000

Version rapporteurs

Diffstat:
ALICENSE | 427+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AREADME | 8++++++++
Abackmatter/assumptions/appendix.tex | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abackmatter/colophon.tex | 23+++++++++++++++++++++++
Abackmatter/conclusion.tex | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abackmatter/datasets/appendix.tex | 210+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abackmatter/datasets/fewrel.tex | 9+++++++++
Abackmatter/datasets/freebase.tex | 9+++++++++
Abackmatter/datasets/semeval.tex | 9+++++++++
Abackmatter/datasets/trex.tex | 10++++++++++
Abackmatter/datasets/wikidata.tex | 41+++++++++++++++++++++++++++++++++++++++++
Abackmatter/french/abstract.tex | 12++++++++++++
Abackmatter/french/appendix.tex | 28++++++++++++++++++++++++++++
Abackmatter/french/conclusion.tex | 11+++++++++++
Abackmatter/french/context.tex | 177+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abackmatter/french/fitb quantitative.tex | 12++++++++++++
Abackmatter/french/fitb.tex | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abackmatter/french/graph quantitative.tex | 9+++++++++
Abackmatter/french/graph.tex | 109+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abackmatter/french/introduction.tex | 35+++++++++++++++++++++++++++++++++++
Abackmatter/french/title.tex | 18++++++++++++++++++
Afrench summary.tex | 18++++++++++++++++++
Afrontmatter/Cheshire Cat.png | 0
Afrontmatter/OuCuiPo.jpg | 0
Afrontmatter/Paris Quadrifolia.jpg | 0
Afrontmatter/Ship of Theseus.jpg | 0
Afrontmatter/abbreviations.tex | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Afrontmatter/abstract.tex | 15+++++++++++++++
Afrontmatter/acknowledgements.tex | 11+++++++++++
Afrontmatter/gavagai 1.tex | 1+
Afrontmatter/gavagai 2.tex | 1+
Afrontmatter/introduction.tex | 255+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afrontmatter/notation.tex | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afrontmatter/thèse.tex | 29+++++++++++++++++++++++++++++
Afrontmatter/title.tex | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alatexmkrc | 26++++++++++++++++++++++++++
Alib/distribution output.def | 14++++++++++++++
Alib/draft version.lua | 39+++++++++++++++++++++++++++++++++++++++
Alib/layout.lua | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/memory network.def | 23+++++++++++++++++++++++
Alib/moved marginpar.lua | 19+++++++++++++++++++
Alib/plate diagram.def | 7+++++++
Alib/render.lua | 206+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/terminal color.lua | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/context/attention.tex | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/context/bert.tex | 43+++++++++++++++++++++++++++++++++++++++++++
Amainmatter/context/bpe.tex | 16++++++++++++++++
Amainmatter/context/chapter.tex | 26++++++++++++++++++++++++++
Amainmatter/context/cnn.tex | 34++++++++++++++++++++++++++++++++++
Amainmatter/context/conclusion.tex | 32++++++++++++++++++++++++++++++++
Amainmatter/context/fact.tex | 11+++++++++++
Amainmatter/context/history.tex | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/context/introduction.tex | 33+++++++++++++++++++++++++++++++++
Amainmatter/context/knowledge base.tex | 295+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/context/lstm.tex | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/context/memory network lm.tex | 19+++++++++++++++++++
Amainmatter/context/relation properties.tex | 12++++++++++++
Amainmatter/context/rnn lm.tex | 21+++++++++++++++++++++
Amainmatter/context/sentence.tex | 341+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/context/sparql.tex | 7+++++++
Amainmatter/context/transe.tex | 28++++++++++++++++++++++++++++
Amainmatter/context/word.tex | 183+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/context/word2vec embeddings.xml | 23+++++++++++++++++++++++
Amainmatter/fitb/align.tex | 12++++++++++++
Amainmatter/fitb/chapter.tex | 20++++++++++++++++++++
Amainmatter/fitb/conclusion.tex | 17+++++++++++++++++
Amainmatter/fitb/confusion lda.xml | 2++
Amainmatter/fitb/confusion pcnn.xml | 2++
Amainmatter/fitb/confusion regularized vae.xml | 2++
Amainmatter/fitb/confusion vae.xml | 2++
Amainmatter/fitb/experiments.tex | 157+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/fitb/fitb split.tex | 23+++++++++++++++++++++++
Amainmatter/fitb/gumbel.tex | 12++++++++++++
Amainmatter/fitb/introduction.tex | 43+++++++++++++++++++++++++++++++++++++++++++
Amainmatter/fitb/model.tex | 232+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/fitb/problem 1.tex | 15+++++++++++++++
Amainmatter/fitb/problem 2.tex | 17+++++++++++++++++
Amainmatter/fitb/quantitative.tex | 34++++++++++++++++++++++++++++++++++
Amainmatter/fitb/related works.tex | 48++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/fitb/variants.tex | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/3-path.tex | 9+++++++++
Amainmatter/graph/T-REx degrees.xml | 4168+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/Weisfeiler-Leman.tex | 23+++++++++++++++++++++++
Amainmatter/graph/analysis.tex | 216+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/approach.tex | 242+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/biclique.tex | 25+++++++++++++++++++++++++
Amainmatter/graph/chapter.tex | 26++++++++++++++++++++++++++
Amainmatter/graph/chebyshev.tex | 17+++++++++++++++++
Amainmatter/graph/conclusion.tex | 11+++++++++++
Amainmatter/graph/encoding.tex | 139+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/experiments.tex | 43+++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/graph convolution parallel.tex | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/introduction.tex | 37+++++++++++++++++++++++++++++++++++++
Amainmatter/graph/isomorphism.tex | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/line graph.tex | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/nell bipartite.tex | 33+++++++++++++++++++++++++++++++++
Amainmatter/graph/path counting.tex | 39+++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/path graph.tex | 9+++++++++
Amainmatter/graph/paths frequencies.tex | 16++++++++++++++++
Amainmatter/graph/quantitative.tex | 12++++++++++++
Amainmatter/graph/related work.tex | 498+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/graph/samples example.tex | 38++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/aggregate.tex | 277+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/bootstrap algorithm.tex | 16++++++++++++++++
Amainmatter/relation extraction/chapter.tex | 22++++++++++++++++++++++
Amainmatter/relation extraction/clustering metrics.tex | 44++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/conclusion.tex | 21+++++++++++++++++++++
Amainmatter/relation extraction/definition.tex | 239+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/dependency tree.tex | 38++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/dipre split.tex | 18++++++++++++++++++
Amainmatter/relation extraction/emes.tex | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/entity pair graph.tex | 9+++++++++
Amainmatter/relation extraction/epgnn sentence representation.tex | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/few-shot problem.tex | 13+++++++++++++
Amainmatter/relation extraction/ie steps.tex | 27+++++++++++++++++++++++++++
Amainmatter/relation extraction/introduction.tex | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/label propagation.tex | 25+++++++++++++++++++++++++
Amainmatter/relation extraction/marcheggiani plate.tex | 16++++++++++++++++
Amainmatter/relation extraction/miml setup.tex | 18++++++++++++++++++
Amainmatter/relation extraction/multir plate.tex | 20++++++++++++++++++++
Amainmatter/relation extraction/multir.tex | 18++++++++++++++++++
Amainmatter/relation extraction/pcnn.tex | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/pullback.tex | 10++++++++++
Amainmatter/relation extraction/rellda plate.tex | 33+++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/rellda.tex | 23+++++++++++++++++++++++
Amainmatter/relation extraction/selfore.tex | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/sentential.tex | 388+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/supervised metrics.tex | 25+++++++++++++++++++++++++
Amainmatter/relation extraction/supervised samples.tex | 9+++++++++
Amainmatter/relation extraction/supervised.tex | 0
Amainmatter/relation extraction/supervision.tex | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/syntactic parse tree.tex | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/universal schema.tex | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/unsupervised.tex | 641+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amainmatter/relation extraction/vae plate.tex | 17+++++++++++++++++
Amainmatter/relation extraction/zipf.tex | 7+++++++
Athesis.bib | 2343+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Athesis.cls | 788+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Athesis.sty | 390+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Athesis.tex | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
140 files changed, 16441 insertions(+), 0 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -0,0 +1,427 @@ +Attribution-ShareAlike 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution-ShareAlike 4.0 International Public +License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution-ShareAlike 4.0 International Public License ("Public +License"). To the extent this Public License may be interpreted as a +contract, You are granted the Licensed Rights in consideration of Your +acceptance of these terms and conditions, and the Licensor grants You +such rights in consideration of benefits the Licensor receives from +making the Licensed Material available under these terms and +conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. BY-SA Compatible License means a license listed at + creativecommons.org/compatiblelicenses, approved by Creative + Commons as essentially the equivalent of this Public License. + + d. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + e. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + f. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + g. License Elements means the license attributes listed in the name + of a Creative Commons Public License. The License Elements of this + Public License are Attribution and ShareAlike. + + h. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + i. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + j. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + k. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + l. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + m. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. Additional offer from the Licensor -- Adapted Material. + Every recipient of Adapted Material from You + automatically receives an offer from the Licensor to + exercise the Licensed Rights in the Adapted Material + under the conditions of the Adapter's License You apply. + + c. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + b. ShareAlike. + + In addition to the conditions in Section 3(a), if You Share + Adapted Material You produce, the following conditions also apply. + + 1. The Adapter's License You apply must be a Creative Commons + license with the same License Elements, this version or + later, or a BY-SA Compatible License. + + 2. You must include the text of, or the URI or hyperlink to, the + Adapter's License You apply. You may satisfy this condition + in any reasonable manner based on the medium, means, and + context in which You Share Adapted Material. + + 3. You may not offer or impose any additional or different terms + or conditions on, or apply any Effective Technological + Measures to, Adapted Material that restrict exercise of the + rights granted under the Adapter's License You apply. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material, + + including for purposes of Section 3(b); and + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the "Licensor." The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/README b/README @@ -0,0 +1,8 @@ +To compile this thesis run: +$ latexmk thesis.tex + +This was only tested using TeX Live 2021 and might need some work to compile with future versions. + +Compiled versions are available at https://esimon.eu/PhD + +The sources are not perfect but I had to graduate at some point. :) diff --git a/backmatter/assumptions/appendix.tex b/backmatter/assumptions/appendix.tex @@ -0,0 +1,74 @@ +\chapter{List of Assumptions} +\label{chap:assumptions} +Modeling hypotheses are central to relation extraction approaches, especially unsupervised ones (see Chapter~\ref{chap:relation extraction}). +This appendix list all assumptions introduced in the previous chapters in alphabetical order, with reference to the section in which it was introduced, and whenever possible a counterexample exposing what kind of construct cannot be captured by making this hypothesis. + +\typeassumption[\(1\to1\)]{onetoone}% +Appeared Section~\refAssumptionSection{onetoone}.\\ +Counterexample: ``Josetsu \textsl{born in} Kyushu'' and ``Minamoto no Shunrai \textsl{born in} Kyushu.'' + +\bigskip + +\typeassumption[1-adjacency]{oneadjacency}% +Appeared Section~\refAssumptionSection{oneadjacency}.\\ +Counterexample: ``Khayyam \textsl{born in} Nishapur'' and ``Khayyam \textsl{died in} Nishapur.'' + +\bigskip + +\typeassumption[1-neighborhood]{oneneighborhood}% +Appeared Section~\refAssumptionSection{oneneighborhood}.\\ +Counterexample: \textsl{born in} and \textsl{died in}. +Since the arc-neighborhood \(\gfeneighbors\) is split between in-and out-neighborhood, this hypothesis is close to \hypothesis{type}. +The main difference being that the partitions (types) of \hypothesis{type} can't overlap. +While a relation which can have any type as a subject can't be modeled under the \hypothesis{type} hypothesis, it will simply correspond to a distribution with mass on all entities in the \hypothesis{1-neighborhood} assumption. + +\bigskip + +\typeassumption[biclique]{biclique} +Appeared Section~\refAssumptionSection{biclique}.\\ +Counterexample: most relations should infringe this assumption since it is decomposable into two unary predicates: whether the entity is part of \(A\) and whether it is part of \(B\). +For example ``Alonzo Church \textsl{died in} Hudson'' and ``Alan Turing \textsl{died in} Wilmslow'' are true but ``Alonzo Church \textsl{died in} Wilmslow'' is false. + +\bigskip + +\typeassumption[blankable]{blankable} +Appeared Section~\refAssumptionSection{blankable}.\\ +Counterexample: some surface forms are mapped to different relations depending on the nature of the entities; in FewRel, ``\uhead{~?~} is part of \utail{~?~}'' can both convey \textsl{part of} and \textsl{part of constellation}. + +\bigskip + +\typeassumption[\ctxoneadj]{ctxoneadjacency} +Appeared Section~\refAssumptionSection{ctxoneadjacency}.\\ +Finding a counterexample for this assumption is quite difficult since it depends on the operation performed by the contextualization function \(\operatorname{ctx}\). +In this sense, it is a weak assumption. + +\bigskip + +\typeassumption[distant]{distant} +Appeared Section~\refAssumptionSection{distant}.\\ +Counterexample: ``Chekhov found himself coughing blood, and in 1886 the attacks worsened, but he would not admit his tuberculosis to his family or his friends.'' does not convey the fact ``Anton Chekhov \textsl{cause of death} Tuberculosis,'' it only conveys ``Anton Chekhov \textsl{has medical condition} Tuberculosis.'' + +\bigskip + +\typeassumption[multi-instance]{multiinstance} +Appeared Section~\refAssumptionSection{multiinstance}.\\ +Counterexample: Even though ``Josetsu \textsl{born in} Kyushu'' is present in Wikidata, at the time of writing, this information is missing from its English Wikipedia page, thus an alignment of \(\dataSet=\text{Wikipedia}\) with \(\kbSet=\text{Wikidata}\) would not verify \hypothesis{multi-instance}. + +\bigskip + +\typeassumption[pullback]{pullback} +Appeared Section~\refAssumptionSection{pullback}.\\ +Entails \hypothesis{1-adjacency}.\\ +Counterexample: Unless the reader is familiar with biographies of early Chinese philosophers, the relation between \wdent{1362266} ``Gongsun Long'' and \wdent{197430} ``Zhao'' should not be immediately obvious. + +\bigskip + +\typeassumption[type]{type} +Appeared Section~\refAssumptionSection{type}.\\ +Counterexample: ``Deneb \textsl{part of} Summer Triangle'' (type pair: star--con\-stel\-la\-tion) and ``Mitochondrion \textsl{part of} Cytoplasm'' (type pair: organelle--cellular component). + +\bigskip + +\typeassumption[uniform]{uniform} +Appeared Section~\refAssumptionSection{uniform}.\\ +Counterexample: The relation ``\textsl{worshipped by}'' generally appears quite a lot less than ``\textsl{place of burial}'' whether measured through the number of facts in Wikidata or as the number of sentences conveying these relations in Wikipedia. diff --git a/backmatter/colophon.tex b/backmatter/colophon.tex @@ -0,0 +1,23 @@ +\markboth{}{}% +\clearpage% +\null% +\vfill% +\pdfbookmark[chapter]{Colophon}{colophon} +\noindent\textsc{Colophon} + +\medskip +\noindent +This document is written in Lua\LaTeX{} using \textsc{pgf}/Ti\emph{k}Z and \textsc{pgfplots} for figures. +Most of the text and math are typeset in Latin Modern, while \textsc{eb} Garamond is used for titles. +A small amount of characters are from the \TeX{} Gyre Bonum and \textsc{xits} fonts. +Greek words are typeset in the Greek Font Society's Didot Classic, while Chinese excerpts are in the \textsc{i}.Ming font. +Finally, the word ``\textsc{thèse}'' on the title page comes from a vectorization of Auguste Boulanger's Ph.D.\ theses (\cite*{these_boulanger}). + +\bigskip +\noindent +The manuscript and sources are freely available at \url{https://esimon.eu/PhD}. + +\medskip +\noindent +\ccbysa\ This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. +To view a copy of this license, visit \url{http://creativecommons.org/licenses/by-sa/4.0/}. diff --git a/backmatter/conclusion.tex b/backmatter/conclusion.tex @@ -0,0 +1,117 @@ +\chapter{Conclusion} +\label{chap:conclusion} +During this Ph.D.\ candidacy, I---mostly% +\sidenote{ + With the occasional---and deeply appreciated---distraction of Syrielle Montariol on unrelated \textsc{nlp} projects \parencite{mmsrl}. +}% +---focused on the study of unsupervised relation extraction. +In this task, given a set of tagged sentences and pairs of entities, we seek the set of conveyed facts \((e_1, r, e_2)\), such that \(r\) embodies the relationship between \(e_1\) and \(e_2\) expressed in some sample. +To tackle this task, we follow two main axes of research: first, the question of how to train a deep neural network for unsupervised relation extraction; second, the question of how to leverage the structure of an unsupervised dataset to gain additional information for the relation extraction task. + +\section*{Summary of Contributions} +For more than a decade now, the field of machine learning has been overrun by deep learning approaches. +Since I started working on unsupervised relation extraction in late 2017, the task followed the same fate. +The \textsc{vae} model of \textcitex{vae_re} started introducing deep learning methods to the task. +However, it was still limited by a sentence representation based on hand-engineered features. +My first axis of research was to partake in this deep learning transition (Chapter~\ref{chap:fitb}). +Subsequently, the use of deep learning was made simpler with the replacement of \textsc{cnn} and \textsc{lstm}-based models with pre-trained transformers. +Indeed, a model like \textsc{bert} \parencite{bert} performs reasonably well on unsupervised relation extraction ``out of the box.'' +This was exploited by others, in the clustering setup by Self\textsc{ore} \parencitex{selfore}, and in the few-shot setup by \textsc{mtb} \parencitex{mtb}. +My second axis of research was to exploit the regularities of the dataset to leverage additional information from its structure (Chapter~\ref{chap:graph}). +While some works already used this information in supervised relation extraction \parencite{label_propagation_re, epgnn}, unsupervised models made no attempt at modeling it explicitly. +Our proposed approaches are based on a graph representation of the dataset. +As we have shown, they inscribe themselves in a general revival of graph-based approaches in deep learning \parencite{gcn_spectral_semi, graphsage}. +We now describe the three main contributions we can draw from our work. + +\paragraph{Literature review with formalized modeling assumptions.}\leavevmode\null\\ +In Chapter~\ref{chap:relation extraction}, we presented relevant relation extraction models from the late 1990s until today. +We first introduced supervised approaches, which we split into two main blocks: +\begin{description}[font=\mdseries\itshape] + \item[Sentential methods] extract a relation for each sample in isolation. + In this setup, there is no difference between evaluating a model on a single dataset with a thousand samples or a thousand datasets containing one sample each. + Indeed, these models do not model the interactions between samples. + \item[Aggregate methods] map a set of unsupervised samples to a set of facts at once. + There is not necessarily a direct correspondence between extracted facts and samples in the dataset, even though most aggregate models still provide a sentential prediction. + In this setup, a dataset containing a single sentence would be meaningless; it would boil down to a sentential approach. +\end{description} +This distinction can also be made for unsupervised models, and indeed Chapter~\ref{chap:fitb} follows mostly a sentential approach, whereas Chapter~\ref{chap:graph} purposes to introduce the aggregate approach to the unsupervised setting. + +In Chapter~\ref{chap:relation extraction}, we also presented unsupervised relation extraction models. +Unsupervised models need to rely on modeling hypotheses to capture the notion of relation. +\begin{marginparagraph} + As a reminder, the modeling hypotheses are listed in Appendix~\ref{chap:assumptions}. +\end{marginparagraph} +While these hypotheses are not always clearly stated in articles, they are central to the design of unsupervised approaches. +For our review, we decided to exhibit the key modeling hypotheses of relevant models. +Formalizing these hypotheses allows us to have a clear understanding of what kind of relations cannot be modeled by a given model. +Furthermore, it simplifies the usually challenging task of designing an unsupervised relation extraction loss. + +\paragraph{Regularizing discriminative approaches for deep encoders.}\leavevmode\null\\ +In Chapter~\ref{chap:fitb}, we introduced the first unsupervised model that does not rely on hand-engineered features. +In particular, we identified two critical weaknesses of previous discriminative models which hindered the use of deep neural networks. +These weaknesses relate to the model's output, which tends to collapse to a trivial---either deterministic or uniform---distribution. +We introduced two relation distribution losses to alleviate these problems: a skewness loss pushes the prediction away from a uniform distribution, and a distribution distance loss prevents the output from collapsing to a deterministic distribution. +This allowed us to train a \textsc{pcnn} model to cluster unsupervised samples in clusters conveying the same relation. + +\paragraph{Exploiting the dataset structure using graph-based models.}\leavevmode\null\\ +In Chapter~\ref{chap:graph}, we investigated aggregate approaches for unsupervised relation extraction. +We encoded the relation extraction problem as a graph labeling---or attributing---problem. +We then showed that information can be leveraged from this structure by probing distributional regularities of random paths. +To exploit this information, we designed an assumption using our experience from Chapter~\ref{chap:relation extraction} to leverage the structure of the graph to supervise a relation extraction model. +We then proposed an approach based on this hypothesis by modifying the Weisfeiler--Leman isomorphism test to use a 1-Wasserstein distance. + +\bigskip + +From a higher vantage point, we can say that we first assisted the development of deep learning approaches for the task of unsupervised relation extraction, and then helped open a new direction of research on aggregate approaches in the unsupervised setup using graph-based models. +Both of these research objects were somewhat natural developments following current trends in machine learning research. + +\section*{Perspectives} + +\paragraph{Using language modeling for relation extraction.} +A recent trend in \textsc{nlp} has been to encode all tasks as language models. +The main embodiment of this trend is T5 \parencitex{t5}. +\begin{marginparagraph} + The name T5 comes from ``Text-To-Text Transfer Transformer'' since it recasts every \textsc{nlp} task as a text-to-text problem. +\end{marginparagraph} +T5 is trained as a masked language model (\textsc{mlm}, Section~\ref{sec:context:mlm}) on a sizeable ``common crawl'' of the web. +Then, it is fine-tuned by prefixing the sequence with a task-specific prompt such as ``translate English to German:''. +Relation extraction can also be trained as a text-to-text model in the supervised setup \parencite{text_to_text_re}. +Extending this model to the unsupervised setup---for example, through the creation of pseudo-labels---could allow us to leverage the large amount of linguistic information contained in the T5 parameters. +In the same vein, \textcitex{lm_rp} propose to use predefined and learned prompts for relation prediction, for example by filling in the following template: ``Today, I finally discovered the relation between \(e_1\) and \(e_2\): \(e_1\) is the \blanktag{} of \(e_2\).'' + +More generally, relation extraction is closely related to language models. +The first model we experimented on during this Ph.D.\ candidacy was a pre-trained language model used to fill sentences such as ``The capital of Japan is \blanktag.'' +While \textcitex{transformers} was already published at the time, pre-trained transformer language models were not widely available yet. +We used a basic \textsc{lstm}, which was strongly biased in favor of entities often appearing in the dataset. +In practice, the model predicted ``London'' as the capital of most small countries. +However, as we showcased in Section~\ref{sec:relation extraction:mtb}, large transformer-based models such as \textsc{bert} \parencite{bert} perform well out-of-the-box on unsupervised relation extraction. +An additional argument in favor of transformer-based language models comes from Chapter~\ref{chap:fitb}. +Indeed, the \emph{fill-in-the-blank} model seeks to predict an entity blanked in the input; this is similar to the \textsc{mlm} task. +More abstractly, language purposes to describe a reality which can be understood---among other things---through the concept of relation. +And indeed, if one understands language, one must understand the relations conveyed by language. +Using a model of language as a basis for a model of relations is promising, as long as the semantic fragment of language unrelated to relations can be discarded. + +\paragraph{Dataset-level modeling hypotheses.} +In the past few years, graph-based approaches have gained traction in the information extraction field \parencite{graphie, graphrel}\sidecite{graphie} and we can only expect this interest to continue growing in the future. +While knowledge of the language should be sufficient to understand the relation underlying most samples, it is challenging to design an unsupervised loss solely relying on linguistic information. +Furthermore, following distributional linguistics, language---and thus relations conveyed by language---are acquired through structured repetitions. +The concept of repetition captured by graph adjacency can therefore also provide a theoretical basis for the design of modeling hypotheses. +We can even argue that capturing the structure of the data is an ontologically prior modeling level. +For this reason, we think that relation graphs should provide a better basis for the formulation of modeling hypotheses. + +\paragraph{Complex relations.} +Several simplifying assumptions were made to define the relation extraction task. +For example, we assume all relations to be binary, holding between exactly two entities. +However, \(n\)-ary relations are needed to model complex interrelationships. +For example, encoding the fact that ``a drug \(e_1\) can be used to treat a disease \(e_2\) when the patient has genetic mutation \(e_3\)'' necessitates a ternary relation. +This problem has been tackled for a long time \parencite{n-ary_old, n-ary_recent}. +The graph-based approaches have a natural extension to \(n\)-ary relation in the form of hypergraphs, which are graphs with \(n\)-ary edges. +Since the hypergraph isomorphism problem can be polynomially reduced to the standard graph isomorphism problem \parencite{gicomplete}, we can expect \(n\)-ary extension of graph-based relation extraction approaches to work as well as standard relation extraction. + +A related problem is the one of fact qualification. +The fact ``Versailles \textsl{capital of} France'' only held until the 1789 revolution. +In the Wikidata parlance, these kinds of details are called \emph{qualifiers}. +In particular, the temporal qualification can be critical to certain relation extraction datasets \parencite{time_aware_re}. +Some information extraction datasets already include this information \parencite{knowledgenet}; however, little work has been made in this direction yet. +Qualifiers could be generated from representations of relations in a continuous manifold such as the one induced by a similarity space for few-shot evaluation. +However, learning to map relation embeddings to qualifiers in an unsupervised fashion might prove difficult. diff --git a/backmatter/datasets/appendix.tex b/backmatter/datasets/appendix.tex @@ -0,0 +1,210 @@ +\chapter{Datasets} +\label{chap:datasets} +In this appendix, we present the primary datasets used throughout this thesis. +Each section corresponds to a dataset or group of datasets. +We focus on the peculiarities which make each dataset unique and provide some statistics relevant to our task. + +\section{\textsc{ace}} +\label{sec:datasets:ace} +Automatic content extraction (\textsc{ace}) is a \textsc{nist} program that developed several datasets for the evaluation of entity chunking and relation extraction. +It is the spiritual successor of \textsc{muc} (Section~\ref{sec:datasets:muc}). +In their nomenclature, the task of relation extraction is called relation detection and categorization (\textsc{rdc}). +Datasets for relation extraction were released yearly between 2002 and 2005.% +\sidenote{ + The dataset from September~2002 is called \textsc{ace-2}. + This refers to the ``second phase'' of \textsc{ace}. + The pilot and first phase corpora only dealt with entity detection. +} +This makes comparison difficult; for example, in Chapter~\ref{chap:relation extraction}, we mention an \textsc{ace} dataset for several models (Sections~\ref{sec:relation extraction:hand-designed features}, \ref{sec:relation extraction:kernel}, \ref{sec:relation extraction:label propagation} and~\ref{sec:relation extraction:epgnn}); however, the versions of the datasets differs. + +A peculiarity of the \textsc{ace} dataset is its hierarchy of relations. +For example, the \textsc{ace-2003} dataset contains a \textsl{social} relation type, which is divided into several relation subtypes such as \textsl{grandparent} and \textsl{sibling}. +Results can be reported either on the relation types or subtypes, usually using an \fone{} measure or a custom metric designed by \textsc{ace} \parencitex{ace_evaluation} to handle directionality and the ``\textsl{other}'' relation (Section~\ref{sec:relation extraction:other}). + +\section{FewRel} +\label{sec:datasets:fewrel} +FewRel \parencitex{fewrel} is a few-shot relation extraction dataset. +Given a query and several candidates, the model must decide which candidate conveys the relation closest to the one conveyed by the query. +Therefore, FewRel is used to evaluate continuous relation representations; it is not typically used to evaluate a clustering model. +For details on the few-shot setup, refer to Section~\ref{sec:relation extraction:few-shot}. + +The dataset was first constructed by aligning Wikipedia with Wikidata (Section~\ref{sec:datasets:wikidata}) using distant supervision (Section~\ref{sec:relation extraction:distant supervision}). +Human annotators then hand-labeled the samples. +The resulting dataset is perfectly balanced; all relations are represented by precisely 700 samples. +The set of the 100 most common relations with good inter-annotator agreement was then divided into three splits, whose sizes are given in Table~\ref{tab:datasets:fewrel}. +Since common relations were strongly undersampled to obtain a balanced dataset, entities do not repeat much. +The attributed multigraph (Section~\ref{sec:graph:encoding}) corresponding to the train split of FewRel is composed of several connected components. +The larger one covers approximately 21\% of the vertices, while more than half of all vertices are in connected components of size three or less. + +\begin{margintable} + \centering + \input{backmatter/datasets/fewrel.tex} + \scaption[Statistics of the FewRel dataset.]{ + Statistics of the FewRel dataset. + The test relations and samples are not publicly available. + \label{tab:datasets:fewrel} + } +\end{margintable} + +FewRel can be used for \(n\) way \(k\) shot evaluation, where usually \(n\in\{5,10\}\) and \(k\in\{1,5\}\). +For reference, \textcite{fewrel} provides human performance on 5 way 1 shot (92.22\% accuracy) and 10 way 1 shot (85.88\% accuracy). + +A subsequent dataset released by the same team called FewRel~2.0 \parencitex{fewrel2} revisited the task by adding two variations: +\begin{description}[nosep] + \item[Domain adaptation,] the training set of the original FewRel is used (Wikipedia--Wikidata), but the model is evaluated on biomedical literature (PubMed--\textsc{umls}) containing relations such as \textsl{may treat} and \textsl{manifestation of}. + \item[Detecting \textsl{other} relation,] also called none-of-the-above, when the relation conveyed by the query does not appear in the candidates. +\end{description} +While domain adaptation is an interesting problem, for unsupervised approaches, the detection of \textsl{other} seems to defeat the point of modeling a similarity space instead of clustering relations. +Furthermore, we only use FewRel as an evaluation tool and never train on it; using this second dataset made, therefore, little sense. + +\section{Freebase} +\label{sec:datasets:freebase} +Freebase \parencitex{freebase} is a knowledge base (Section~\ref{sec:context:knowledge base}) started in~2007 and discontinued in~2016. +\begin{margintable} + \centering + \input{backmatter/datasets/freebase.tex} + \scaption[Statistics of the Freebase knowledge base.]{ + Statistics of the Freebase knowledge base at the time of its termination. + Most relations (around 81\%) appear only once in the knowledge base. + \label{tab:datasets:freebase} + } +\end{margintable} +As one of the first widely available knowledge bases containing general knowledge, Freebase was widely used for weak supervision. +In particular, it is the knowledge base used in the original distant supervision article \parencite{distant}. +Freebase was a collaborative knowledge base; as such, its content evolved through its existence. +Therefore, even though \textcite{distant}, \textcite{rellda} and \textcite{vae_re} all run experiments on Freebase, their results are not comparable since they use different versions of the dataset. +Data dumps are still provided by \textcite{freebase_data}; however, most of the facts were transferred to the Wikidata knowledge base (Section~\ref{sec:datasets:wikidata}). +Some statistics about the latest version of Freebase are provided in Table~\ref{tab:datasets:freebase}. +However, note that most relations in Freebase are scarcely used; only 6\,760 relations appear in more than 100 facts. +Furthermore, the concept of entities is quite wide in Freebase, in particular it makes use of a concept called mediator \parencite{freebase_processing}: +\begin{indentedexample} + \texttt{/m/02mjmr} \textsl{/topic/notable\_for} \textcolor{Dark2-B}{\texttt{/g/125920}}\\ + \textcolor{Dark2-B}{\texttt{/g/125920}} \textsl{/c…/notable\_for/object} \texttt{/gov…/us\_president}\\ + \textcolor{Dark2-B}{\texttt{/g/125920}} \textsl{/c…/notable\_for/predicate} \texttt{/type/object/type} +\end{indentedexample} +Here \texttt{/m/02mjmr} refers to ``Barack Obama,'' while \texttt{/g/125920} is the mediator entity which is used to group together several statements about \texttt{/m/02mjmr}. + +\section{\textsc{muc-7 tr}} +\label{sec:datasets:muc} +The message understanding conferences (\textsc{muc}) were organized by \textsc{darpa} in the 1980s and 1990s. +The seventh---and last---conference \parencitex{muc7} introduced a relation extraction task called ``template relation'' (\textsc{tr}). +Three relations needed to be extracted: \textsl{employee of}, \textsl{location of} and \textsl{product of}. +Both the train set and evaluation set contained 100 articles. +The task was very much still in the ``template filling'' mindset; this can be seen by the following example of extracted fact: +\begin{indentedexample} + \texttt{<\textsc{employee\_of}-9602040136-5> :=}\\ + \null\qquad\texttt{\textsc{person}: <\textsc{entity}-9602040136-11>}\\ + \null\qquad\texttt{\textsc{organization}: <\textsc{entity}-9602040136-1>} + + \medskip + + \texttt{<\textsc{entity}-9602040136-11> :=}\\ + \null\qquad\texttt{\textsc{ent\_name}: "Dennis Gillespie"}\\ + \null\qquad\texttt{\textsc{ent\_type}: \textsc{person}}\\ + \null\qquad\texttt{\textsc{ent\_descriptor}: "Capt."}\\ + \null\qquad\texttt{/ "the commander of Carrier Air Wing 11"}\\ + \null\qquad\texttt{\textsc{ent\_category}: \textsc{per\_mil}} + + \medskip + + \texttt{<\textsc{entity}-9602040136-1> :=}\\ + \texttt{\textsc{ent\_name}: "\textsc{navy}"}\\ + \texttt{\textsc{ent\_type}: \textsc{organization}}\\ + \texttt{\textsc{ent\_category}: \textsc{org\_govt}} +\end{indentedexample} + +\section{New York Times} +\label{sec:datasets:nyt} +The New York Times Annotated Corpus (\textsc{nyt}, \citex{nyt}) was widely used for relation extraction. +The full dataset contains 1.8 million articles from 1987 to 2007; however, smaller---and sadly, different---subsets are in use. +The subset we use in Chapter~\ref{chap:fitb} was first extracted by \textcitex{vae_re} and is supposed to be similar---but not identical---to the one of \textcite{rellda}. +This \textsc{nyt} subset only contains articles from 2000 to 2007 from which ``noisy documents'' were filtered out. +Semi-structured information such as tables and lists were also removed. +The version of the dataset we received from Diego Marcheggiani was already preprocessed, with features listed in Section~\ref{sec:fitb:baselines} already extracted. + +The original dataset can be obtained from the following website: +\begin{center} +\url{https://catalog.ldc.upenn.edu/LDC2008T19} +\end{center} +At the time of writing, once the license fee is paid, the only way to obtain the subset of \textcite{vae_re} and Chapter~\ref{chap:fitb} is through someone with access to this specific subset. +This burdensome---and expensive---procedure is one of the reasons for which we introduced \textsc{t-re}x-based alternatives in Chapter~\ref{chap:fitb}. + +\section{SemEval 2010 Task 8} +\label{sec:datasets:semeval} +SemEval is the international workshop on semantic evaluation, which was started in~1998 (then called Senseval) with the goal of emulating the message understanding conferences (Section~\ref{sec:datasets:muc}). +In~2010, eighteen different tasks were evaluated. +Task number~8 was relation extraction. +SemEval~2010 Task~8 \parencitex{semeval2010task8} therefore refers to the dataset provided at the time of this challenge. +It is a supervised relation extraction dataset without entity linking and with non-unique entity reference (Section~\ref{sec:relation extraction:entity}). +Its statistics are listed in Table~\ref{tab:datasets:semeval}. +\begin{margintable} + \input{backmatter/datasets/semeval.tex} + \scaption[Statistics of the SemEval~2010 Task~8 dataset.]{ + Statistics of the Sem\-Eval~2010 Task~8 dataset. + \label{tab:datasets:semeval} + } +\end{margintable}% +All samples were hand-labeled by human annotators with one of 19 relations. +These 19 relations are built from 9 base relations, which can appear in both directions (Section~\ref{sec:relation extraction:directionality}), plus the \textsl{other} relation (Section~\ref{sec:relation extraction:other}). +The 9 base relations in the dataset are: +\begin{itemize}[nosep] + \item \textsl{cause--effect} + \item \textsl{instrument--agency} + \item \textsl{product--producer} + \item \textsl{content--container} + \item \textsl{entity--origin} + \item \textsl{entity--destination} + \item \textsl{component--whole} + \item \textsl{member--collection} + \item \textsl{message--topic} +\end{itemize} +SemEval~2010 Task~8 introduced an extensive evaluation system, most of which is described in Section~\ref{sec:relation extraction:supervised evaluation}. +In particular, the official score of the competition was the half-directed macro-\(\overHalfdirected{\fone}\) (described in Section~\ref{sec:relation extraction:supervised evaluation}) which was referred to as ``\(9+1\)-way evaluation taking directionality into account.'' + +\section{\textsc{t-re}x} +\label{sec:datasets:trex} +\textsc{t-re}x \parencitex{trex} is an alignment of Wikipedia with Wikidata. +In particular, \textsc{t-re}x uses \textsc{db}pedia abstracts \parencite{dbpedia_abstracts}, that is, the introductory paragraphs of Wikipedia's articles. +Its statistics are listed in Table~\ref{tab:datasets:trex}. + +\begin{margintable} + \centering + \input{backmatter/datasets/trex.tex} + \scaption[Statistics of the \textsc{t-re}x dataset.]{ + Statistics of the \textsc{t-re}x dataset. + \label{tab:datasets:trex} + } +\end{margintable} + +In the final dataset, entities are linked using the \textsc{db}pedia spotlight entity linker \parencite{spotlight}. +Furthermore, indirect entity links are extracted using coreference resolution and a ``NoSub Aligner,'' which assumes that the title of the article is implicitly mentioned by all sentences. +Finally, some sequences of words are also linked to relations using exact matches of Wikidata relation names. +Both the datasets used in Chapters~\ref{chap:fitb} and~\ref{chap:graph} only consider entities extracted by the spotlight entity linker (tagged \texttt{Wikidata\_Spotlight\_Entity\_Linker}). +The two datasets of Chapter~\ref{chap:fitb} were filtered based on the tag of the predicate. +\textsc{spo} only contains samples whose predicate's surface form appears in the sentence (tagged \texttt{Wikidata\_Property\_Linker}), while \textsc{ds} contains all samples with the two entities occurring in the same sentence (in other words, all samples except those tagged \texttt{NoSubject-Triple-aligner}). + + +\section{Wikidata} +\label{sec:datasets:wikidata} +Wikidata \parencitex{wikidata} is a knowledge base (Section~\ref{sec:context:knowledge base}) started in~2012. +Similar to the other projects of the Wikimedia Foundation, it is a collaborative enterprise; everyone can contribute new facts and entities. +The introduction of new relations is made through the consensus of long-term contributors to avoid the explosion of relations types observed on Freebase (section~\ref{sec:datasets:freebase}). + +\begin{figure} + \centering + \input{backmatter/datasets/wikidata.tex} + \scaption[Structure of a Wikidata page.]{ + Structure of a Wikidata page. + Facts related to two relations are shown (``statement groups'' in Wikidata parlance). + This page can be translated into three \(\entitySet^2\times\relationSet\) facts; the first has four additional qualifiers and the second has two additional qualifiers. + \label{fig:datasets:wikidata} + } +\end{figure} + +Contrary to the way knowledge bases are presented in Section~\ref{sec:context:knowledge base}, Wikidata is not structured as a set of \(\entitySet^2\times\relationSet\) triplets. +Instead, in Wikidata, all entities have a page that lists facts of which the entity is the subject. +These constitute our set \(\kbSet\subseteq\entitySet^2\times\relationSet\). +Furthermore, Wikidata facts can be qualified by additional \(\relationSet\times\entitySet\) pairs. +For example, Douglas Adams was \textsl{educated at} St John's College \underLine{\textsl{until} 1974}. +This structure is illustrated in Figure~\ref{fig:datasets:wikidata}. +To be more precise, Wikidata could be modeled as a set of qualified facts, where a qualified fact is an element of \(\entitySet^2\times\relationSet\times2^{\relationSet\times\entitySet}\). diff --git a/backmatter/datasets/fewrel.tex b/backmatter/datasets/fewrel.tex @@ -0,0 +1,9 @@ +\begin{tabular}{l r r} + \toprule + Split & Relations & Samples \\ + \midrule + Train & 64 & 44\,800 \\ + Valid & 16 & 11\,200 \\ + Test & 20 & 14\,000 \\ + \bottomrule +\end{tabular} diff --git a/backmatter/datasets/freebase.tex b/backmatter/datasets/freebase.tex @@ -0,0 +1,9 @@ +\begin{tabular}{l l} + \toprule + Object & Number \\ + \midrule + Facts & 3.1~billion \\ + Entities & 195~million \\ + Relations & 784\,977 \\ + \bottomrule +\end{tabular} diff --git a/backmatter/datasets/semeval.tex b/backmatter/datasets/semeval.tex @@ -0,0 +1,9 @@ +\begin{tabular}{l r} + \toprule + Object & Number \\ + \midrule + Train samples & 8\,000\\ + Test samples & 2\,717\\ + Relations & \(2\times 9+1 = 19\) \\ + \bottomrule +\end{tabular} diff --git a/backmatter/datasets/trex.tex b/backmatter/datasets/trex.tex @@ -0,0 +1,10 @@ +\begin{tabular}{l r} + \toprule + Object & Number \\ + \midrule + Articles & 3 million \\ + Sentences & 6.2 million \\ + Facts & 11 million \\ + Relations & 642 \\ + \bottomrule +\end{tabular} diff --git a/backmatter/datasets/wikidata.tex b/backmatter/datasets/wikidata.tex @@ -0,0 +1,41 @@ +\begin{tikzpicture}[braced/.style={decorate, decoration={brace, amplitude=5}}] + \node[anchor=south west] (title) at (0, 0) {\Large\bf Douglas Adams (\wdent{42})}; + \node[below=2mm of title.south west, right] (description) {English writer and humorist}; + \node[below=1mm of description.south west, right] (alias) {Douglas Noël Adams | Douglas Noel Adams}; + \node[below=3mm of alias.south west, right] (statementstitle) {\quad\large\bf Statements}; + \node[below=3mm of statementstitle.south west, right] (educated) {\quad \textsl{educated at} (\wdrel{69})}; + \node[below=1mm of educated.south west, right, xshift=10mm] (stjohn) {\(\bullet\) St John's College (\wdent{691283})}; + \node[inner sep=0, below=1mm of stjohn.south west, right, xshift=10mm] (start) {\textsl{start time} (\wdrel{580}) 1971}; + \node[inner sep=0, below=1.8mm of start.south west, right] (end) {\textsl{end time} (\wdrel{582}) 1974}; + \node[inner sep=0, below=1.8mm of end.south west, right] (major) {\textsl{academic major} (\wdrel{812}) English literature (\wdent{186579})}; + \node[inner sep=0, below=1.8mm of major.south west, right] (degree) {\textsl{academic degree} (\wdrel{512}) Bachelor of Arts (\wdent{1765120})\ \null}; + \node[below=2.5mm of degree.south west, right, xshift=-10mm] (brentwood) {\(\bullet\) Brentwood School (\wdent{4961791})}; + \node[inner sep=0, below=1mm of brentwood.south west, right, xshift=10mm] (start2) {\textsl{start time} (\wdrel{580}) 1959}; + \node[inner sep=0, below=1.8mm of start2.south west, right] (end2) {\textsl{end time} (\wdrel{582}) 1970}; + \node[below=5mm of end2.south west, right, xshift=-20mm] (work) {\quad\textsl{work location} (\wdrel{937})}; + \node[below=1mm of work.south west, right, xshift=10mm] (london) {\(\bullet\) London (\wdent{84})}; + \node[below=3mm of london.south west, right, xshift=-10mm] (dots) {\quad\dots}; + + \node[right=5mm of brentwood] (object2) {\textcolor{Dark2-A}{object (``\(e_2\)'')}}; + \node[anchor=west] (subject) at (object2.west|-title) {\textcolor{Dark2-A}{subject (``\(e_1\)'')}}; + \node[anchor=west] (relation1) at (object2.west|-educated) {\textcolor{Dark2-A}{relation (``\(r\)'')}}; + \node[anchor=west] (object1) at (object2.west|-stjohn) {\textcolor{Dark2-A}{object (``\(e_2\)'')}}; + \node[anchor=west] (relation2) at (object2.west|-work) {\textcolor{Dark2-A}{relation (``\(r\)'')}}; + \node[anchor=west] (object3) at (object2.west|-london) {\textcolor{Dark2-A}{object (``\(e_2\)'')}}; + \draw[Dark2-A] (title) -- (subject); + \draw[Dark2-A] (educated) -- (relation1); + \draw[Dark2-A] (stjohn) -- (object1); + \draw[Dark2-A] (brentwood) -- (object2); + \draw[Dark2-A] (work) -- (relation2); + \draw[Dark2-A] (london) -- (object3); + + \draw[braced, Dark2-B] (degree.south west) -- (start.north west) node[midway, left, xshift=-2mm] {\textcolor{Dark2-B}{qualifiers}}; + \draw[braced, Dark2-B] (end2.south west) -- (start2.north west) node[midway, left, xshift=-2mm] (qualifiers) {\textcolor{Dark2-B}{qualifiers}}; + + \node[draw, inner sep=3mm, fit=(title) (degree) (dots)] {}; + + \begin{pgfonlayer}{background} + \node[fill=black!10, inner sep=0mm, fit=(educated) (degree) (end2) (qualifiers)] {}; + \node[fill=black!10, inner sep=0mm, fit=(work) (work-|degree.east) (london)] {}; + \end{pgfonlayer} +\end{tikzpicture} diff --git a/backmatter/french/abstract.tex b/backmatter/french/abstract.tex @@ -0,0 +1,12 @@ +{\mdseries\garamond\textsc{\ifthesissummary Résumé\else Meta-résumé\fi}}\quad +Détecter les relations exprimées dans un texte est un problème fondamental de la compréhension du langage naturel. +Il constitue un pont entre deux approches historiquement distinctes de l'intelligence artificielle, celles à base de représentations symboliques et distribuées. +Cependant, aborder ce problème sans supervision humaine pose plusieurs problèmes et les modèles non supervisés ont des difficultés à faire écho aux avancées des modèles supervisés. +Cette thèse aborde deux lacunes des approches non supervisées~: le problème de la régularisation des modèles discriminatifs et le problème d'exploitation des informations relationnelles à partir des structures des jeux de données. +La première lacune découle de l'utilisation de réseaux neuronaux profonds. +Ces modèles ont tendance à s'effondrer sans supervision. +Pour éviter ce problème, nous introduisons deux fonctions de coût sur la distribution des relations pour contraindre le classifieur dans un état entraînable. +La deuxième lacune découle du développement des approches au niveau des jeux de données. +Nous montrons que les modèles non supervisés peuvent tirer parti d'informations issues de la structure des jeux de données, de manière encore plus décisive que les modèles supervisés. +Nous exploitons ces structures en adaptant les méthodes non supervisées existantes pour capturer les informations topologiques à l'aide de réseaux convolutifs pour graphes. +De plus, nous montrons que nous pouvons exploiter l'information mutuelle entre les données topologiques et linguistiques pour concevoir un nouveau paradigme d'entraînement pour l'extraction non supervisée de relations. diff --git a/backmatter/french/appendix.tex b/backmatter/french/appendix.tex @@ -0,0 +1,28 @@ +\ifthesissummary% + \input{backmatter/french/title.tex}% +\else% + \chapter{\foreignlanguage{french}{Résumé en français}}% + \label{chap:french}% +\fi% +\begin{otherlanguage}{french} +\begin{epigraph} + {Gustave Flaubert} + {\frquote{\citefield{age_du_capitaine}{title}}} + {\cite*{age_du_capitaine}}[Flaubert se moque de l'enseignement mathématique à \frquote{son vieux rat} (Caroline Flaubert). Celle-ci ne répondit pas en prenant en compte la corrélation entre la responsabilité de diriger un navire jaugeant 200 tonneaux et l'avancée de la carrière du capitaine.] + Puisque tu fais de la géométrie et de la trigonométrie, je vais te donner un problème : Un navire est en mer, il est parti de Boston chargé de coton, il jauge 200 tonneaux ; il fait voile vers le Havre, le grand mât est cassé, il y a un mousse sur le gaillard d’avant, les passagers sont au nombre de douze, le vent souffle \textsc{n}.-\textsc{e}.-\textsc{e}., l’horloge marque 3 heures un quart d’après-midi, on est au mois de mai… On demande l’âge du capitaine ? +\end{epigraph}% +\begin{epigraph} + {Alfred Jarry} + {\citetitle{faustroll}} + {\cite*{faustroll}} + À travers l’espace feuilleté des vingt-sept pairs, Faustroll évoqua vers la troisième dimension :\\ + De Baudelaire, le Silence d’Edgard Poë, en ayant soin de retraduire en grec la traduction de Baudelaire. +\end{epigraph}% + +\input{backmatter/french/abstract.tex} +\input{backmatter/french/introduction.tex} +\input{backmatter/french/context.tex} +\input{backmatter/french/fitb.tex} +\input{backmatter/french/graph.tex} +\input{backmatter/french/conclusion.tex} +\end{otherlanguage} diff --git a/backmatter/french/conclusion.tex b/backmatter/french/conclusion.tex @@ -0,0 +1,11 @@ +\section{Conclusion} +Pendant ma candidature au doctorat, je me suis---principalement% +\sidenote{ + Avec la distraction occasionnelle---et profondément appréciée---de Syrielle Montariol sur d'autres projets de \textsc{tal} \parencite{mmsrl}. +}% +---con\-centré sur l'étude de l'extraction non supervisée de relations. +Dans cette tâche, étant donné un ensemble de phrases et de paires d'entités, nous recherchons l'ensemble des faits véhiculés \((e_1, r, e_2)\), tels que \(r\) exprime la relation entre \(e_1\) et \(e_2\) dans un échantillon. +Pour mener à bien cette tâche, nous avons suivi deux axes de recherche principaux : premièrement, la question de savoir comment entraîner un réseau neuronal profond pour l'extraction non supervisée de relations ; deuxièmement, la question de savoir comment tirer parti de la structure d'un ensemble de données pour obtenir des informations supplémentaires pour la tâche d'extraction de relations sans supervision. + +Plus grossièrement, nous avons d'abord aidé au développement d'approches d'apprentissage profond pour la tâche d'extraction non supervisée de relations, puis contribué à ouvrir une nouvelle direction de recherche sur les approches au niveau des jeux de données dans la configuration non supervisée utilisant des modèles basés sur des graphes. +Ces deux objets de recherche étaient en quelque sorte des développements naturels suivant les tendances actuelles de la recherche en apprentissage automatique. diff --git a/backmatter/french/context.tex b/backmatter/french/context.tex @@ -0,0 +1,177 @@ +\section{Contexte} +\label{sec:french:context} +L'extraction de relations peut nous aider à mieux comprendre le fonctionnement des langues. +Par exemple, la question de savoir s'il est possible d'apprendre une langue à partir d'une petite quantité de données reste une question ouverte en linguistique. +L'argument de la pauvreté du stimulus affirme que les enfants ne devraient pas être capable d'acquérir des compétences linguistiques en étant exposés à si peu de données.% +\sidenote{Ce qui impliquerait qu'une partie de la maîtrise du langage est innée.} +Il s'agit de l'un des principaux arguments en faveur de la théorie controversée de la grammaire universelle. +Capturer des relations à partir de rien d'autre qu'un petit nombre d'expressions en langue naturelle serait un premier pas vers la réfutation de l'argument de la pauvreté du stimulus. + +Ce type de motivation derrière le problème d'extraction de relations cherche à avancer l'\emph{épistémè}.% +\sidenote{Du grec ancien \foreignlanguage{greek}{ἐπιστήμη} : connaissance, savoir.} +Cependant, la plupart des avancées sur cette tâche découlent d'une recherche de \emph{technè}.% +\sidenote{Du grec ancien \foreignlanguage{greek}{τέχνη} : technique, art.} +L'objectif final est de construire un système ayant des applications dans le monde réel. +Dans cette perspective, l'intelligence artificielle a pour but de remplacer ou d'assister les humains dans des tâches spécifiques. +La plupart des tâches nécessitent une certaine forme de connaissances techniques (par exemple, le diagnostic médical nécessite la connaissance des relations entre symptômes et maladies). +Le principal vecteur de connaissances est le langage (par exemple, à travers l'éducation). +Ainsi, l'acquisition de connaissances à partir d'énoncés en langue naturelle est un problème fondamental pour les systèmes destinés à avoir des applications concrètes. + +\Textcitex{assisted_curation} présentent une analyse de l'impact des systèmes d'extraction de connaissances à partir de textes sur un problème concret. +Leur article montre que les annotateurs humains peuvent utiliser un système d'apprentissage automatique pour mieux extraire un ensemble d'interactions protéine--protéine de la littérature biomédicale. +Il s'agit clairement d'une recherche de \emph{technè} : les interactions protéine--protéine ne sont pas de nouvelles connaissances, elles sont déjà publiées ; cependant, le système améliore le travail de l'opérateur humain. + +Cet exemple d'application est révélateur du problème plus vaste de l'explosion informationnelle. +La quantité d'informations publiées n'a cessé de croître au cours des dernières décennies. +L'apprentissage automatique peut être utilisé pour filtrer ou agréger cette grande quantité de données. +Pour ce genre de tâches, l'objet d'intérêt n'est pas le texte en lui-même mais la sémantique véhiculée, sa signification. +Une question se pose alors : comment définir la sémantique que l'on cherche à traiter ? +En effet, la définition du concept de \frquote{sens} fait l'objet de nombreuses discussions dans la communauté philosophique. +Bien que certains sceptiques, comme Quine, ne reconnaissent pas le sens comme un concept essentiel, ils estiment qu'une description minimale du sens devrait au moins englober la reconnaissance de la synonymie. +Cela fait suite à la discussion ci-dessus sur la reconnaissance des répétitions : si \input{frontmatter/gavagai 1.tex} est une répétition de \input{frontmatter/gavagai 2.tex}, nous devrions pouvoir dire que \input{frontmatter/gavagai 1.tex} et \input{frontmatter/gavagai 2.tex} sont synonymes. +En pratique, cela implique que nous devrions être en mesure d'extraire des classes de formes linguistiques ayant la même signification ou le même référent --- la différence entre les deux n'est pas pertinente pour notre problème. + +\begin{marginparagraph}[-5cm] + \includegraphics[width=\marginparwidth]{frontmatter/Paris Quadrifolia.jpg} + Paris (\wdent{162121}) n'est ni la capitale de la France, ni le prince de Troie, c'est le genre de la parisette à quatre feuilles. + La capitale de la France est Paris (\wdent{90}) et le prince de Troie, fils de Priam, Pâris (\wdent{167646}). + Illustration tirée de \textcite{paris_quadrifolia}. + \label{margin:french:paris quadrifolia} +\end{marginparagraph} +\begin{epigraph} + {Willard Van Orman Quine} + {\citetitle{quine_two_dogma}} + {\cite*{quine_two_dogma}} + [Traduction de \textsc{Laugier} (\cite*{quine_two_dogma_fr})][5cm] + La signification, c'est ce que devient l'essence, une fois divorcée d'avec l'objet de la référence et remariée au mot. +\end{epigraph} + +Bien que la discussion au sujet du sens soit essentielle pour définir la notion de relation qui nous intéresse, il est important de noter que nous travaillons sur la langue naturelle ; nous voulons extraire des relations à partir de textes, et non de répétitions d'entités abstraites. +Pourtant, la correspondance entre les signifiants linguistiques et leur signification n'est pas bijective. +Nous pouvons distinguer deux types de désalignement entre les deux : soit deux expressions renvoient au même objet (synonymie), soit la même expression renvoie à des objets différents selon le contexte dans lequel elle apparaît (homonymie). +La première variété de désalignement est la plus courante, surtout au niveau de la phrase. +Par exemple, \frquote{Paris est la capitale de la France} et \frquote{la capitale de la France est Paris} véhiculent le même sens malgré des formes écrites et orales différentes. +Au contraire, le second type est principalement visible au niveau des mots. +Par exemple, la préposition \frquote{de} dans les phrases \frquote{frémir de peur} et \frquote{Bellérophon de Corinthe} traduit soit une relation \textsl{causé par} soit une relation \textsl{né à}. +Pour distinguer ces deux utilisations de \frquote{de,} nous pouvons utiliser des identifiants de relation tels que \wdrel{828} pour \textsl{causé par} et \wdrel{19} pour \textsl{né à}. +Un exemple avec des identifiants d'entités --- qui ont pour but d'identifier de manière unique les concepts d'entité --- est donné dans la marge. + +Alors que la discussion qui précède donne l'impression que tous les objets s'inscrivent parfaitement dans des concepts clairement définis, en pratique, c'est loin d'être le cas. +Très tôt dans la littérature de la représentation des connaissances, \textcitex{is-a_analysis} a remarqué la difficulté de définir clairement des relations apparemment simples telles que \textsl{instance de} (\wdrel{31}). +Ce problème découle de l'hypothèse selon laquelle la synonymie est transitive et, par conséquent, induit des classes d'équivalence. +Cette hypothèse est assez naturelle puisqu'elle s'applique déjà au lien entre le langage et ses références : même si deux chats peuvent être très différents l'un de l'autre, nous les regroupons sous le même signifiant. +Cependant, la langue naturelle est flexible. +Lorsque nous essayons de capturer l'entité \frquote{chat,} il n'est pas tout à fait clair si nous incluons \frquote{un chat avec le corps d'une tarte aux cerises} dans les expériences ordinaires de chat.% +\sidenote[][-14mm]{Le lecteur qui décrirait une telle entité comme étant un chat est invité à remplacer diverses parties du corps de ce chat imaginaire par des aliments jusqu'à ce que cesse son expérience de \emph{félinité}.} +Pour contourner ce problème, certains travaux récents sur le problème d'extraction de relations \parencitex{fewrel} définissent la synonymie comme une association continue intransitive. +Au lieu de regrouper les formes linguistiques dans des classes bien définies partageant un sens unique, ils extraient une fonction de similarité mesurant la ressemblance de deux objets. + +\smallskip + +Maintenant que nous avons conceptualisé notre problème, concentrons-nous sur l'approche technique que nous proposons. +Tout d'abord, pour résumer, cette thèse se concentre sur l'extraction non supervisée de relations à partir de textes.% +\sidenote[][-6.4mm]{Nous utilisons le texte car il s'agit de l'expression la moins ambiguë et la plus facile à traiter de la langue.} +Les relations étant des objets capturant les interactions entre les entités, notre tâche est de trouver la relation reliant deux entités données dans un texte. +Par exemple, dans les trois exemples suivants où les entités sont soulignées : +\begin{marginparagraph} + Nous utilisons les identifiants Wikidata (\url{https://www.wikidata.org}) pour indexer les entités et les relations. + Les identifiants des entités commencent par \texttt{Q}, tandis que les identifiants des relations commencent par \texttt{P}. + Par exemple, \wdent{35120} est une entité. +\end{marginparagraph} +\begin{marginparagraph} + \includegraphics[width=\marginparwidth]{frontmatter/Ship of Theseus.jpg} + Ariane se réveille sur le rivage de Naxos où elle a été abandonnée, peinture murale d'Herculanum dans la collection du \textsc{British Museum} (\cite*{ship_of_theseus}). + Le navire au loin peut être identifié comme étant le bateau de Thésée, pour l'instant. + Selon le point de vue philosophique du lecteur (\wdent{1050837}), son identité en tant que bateau de Thésée pourrait ne pas perdurer. +\end{marginparagraph} +\begin{indentedexample} + \uhead{Megrez} est une étoile de la constellation circumpolaire nord de la \utail{Grande Ourse}. + + \smallskip + + \uhead{Posidonios} était un philosophe, astronome, historien, mathématicien et professeur grec originaire d'\utail{Apamée}. + + \smallskip + + \uhead{Hipparque} est né à \utail{Nicée}, et est probablement mort sur l'île de Rhodes, en Grèce. +\end{indentedexample} +nous souhaitons reconnaître que les deux dernières phrases véhiculent la même relation --- dans ce cas, \sfTripletHolds{e_1}{né à}{e_2} (\wdrel{19}) --- ou du moins, suivant la discussion du paragraphe précédent sur la difficulté de définir des classes de relations, nous voulons reconnaître que les relations exprimées par les deux derniers échantillons sont plus proches l'une de l'autre que celle exprimée par le premier échantillon. +Nous avançons que cela peut être réalisé par des algorithmes d'apprentissage automatique. +En particulier, nous étudions comment aborder cette tâche en utilisant l'apprentissage profond. +Bien que l'extraction de relations puisse être abordée comme un problème de classification supervisée standard, l'étiquetage d'un jeu de données avec des relations précises est une tâche fastidieuse, en particulier lorsque l'on traite des documents techniques tels que la littérature biomédicale étudiée par \textcite{assisted_curation}. +Un autre problème fréquemment rencontré par les annotateurs est la question de l'applicabilité d'une relation, par exemple, l'expression \frquote{le \utail{père} fondateur du \uhead{pays}} doit-elle être étiquetée avec la relation \textsl{produit--producteur} ?% +\sidenote{ + L'annotateur de ce morceau de phrase dans le jeu de données SemEval~2010 Task~8 a considéré qu'il exprimait effectivement la relation \textsl{produit--producteur}. + La difficulté d'appliquer précisément une définition est un argument supplémentaire en faveur des approches basées sur les fonctions de similarité par rapport aux approches de classification. +} +Nous examinons maintenant comment l'apprentissage profond est devenu la technique la plus prometteuse pour s'attaquer aux problèmes de traitement de la langue naturelle. + +La matière première du problème d'extraction de relations est le langage. +Le traitement automatique de la langue naturelle (\textsc{tal})% +\sidenote{\emph{natural language processing} (\textsc{nlp})} +était déjà une direction de recherche importante dans les premières années de l'intelligence artificielle. +On peut le voir du point de vue \emph{épistémè} dans l'article fondateur de \textcitex{turing_test}. +Cet article propose la maîtrise du langage comme preuve d'intelligence, dans ce qui est maintenant connu sous le nom de test de Turing. +La langue était également un sujet d'intérêt pour des objectifs de \emph{technè}. +En janvier 1954, l'expérience de Georgetown--\textsc{ibm} tente de démontrer la possibilité de traduire le russe en anglais à l'aide d'ordinateurs \parencite{georgetown-ibm}. +L'expérience proposait de traduire soixante phrases en utilisant un dictionnaire bilingue pour traduire individuellement les mots et six types de règles grammaticales pour les réorganiser. +Les premières expériences ont suscité beaucoup d'attentes, qui ont été suivies d'une inévitable déception, entraînant un \frquote{hiver} durant lequel les fonds attribués à la recherche en intelligence artificielle ont été restreints. +Si la traduction mot à mot est assez facile dans la plupart des cas, la traduction de phrases entières est beaucoup plus difficile. +La mise à l'échelle de l'ensemble des règles grammaticales dans l'expérience de Georgetown--\textsc{ibm} s'est avérée impraticable. +Cette limitation n'était pas d'ordre technique. +Avec l'amélioration des systèmes de calcul, davantage de règles auraient pu facilement être codées. +L'un des problèmes identifiés à l'époque était celui de la compréhension du sens commun.% +\sidenote[][-42.5mm]{\emph{commonsense knowledge}} +Pour traduire ou, plus généralement, traiter une phrase, il faut la comprendre dans le contexte du monde dans lequel elle a été prononcée. +De simples règles de réécriture ne peuvent pas rendre compte de ce processus.% +\sidenote[][-5cm]{ + Par ailleurs, la grammaire est encore un domaine de recherche actif. + Nous ne comprenons pas parfaitement la réalité sous-jacente capturée par la plupart des mots et sommes donc incapables d'écrire des règles formelles complètes pour leurs usages. + Par exemple, \textcite{prepositions_francais} présente un article de linguistique traitant de l'utilisation des prépositions françaises \frquote{de} et \frquote{à.} + C'est l'un des arguments en faveur des approches non supervisées ; en évitant d'étiqueter manuellement les jeux de données, nous évitons la limite des connaissances des annotateurs humains. +} +Pour pouvoir traiter des phrases entières, un changement de paradigme était nécessaire. + +Une première évolution a eu lieu dans les années 1990 avec l'avènement des approches statistiques \parencite{statistical_methods}. +Ce changement peut être attribué en partie à l'augmentation de la puissance de calcul, mais aussi à l'abandon progressif de préceptes linguistique essentialistes au profit de préceptes distributionnalistes.% +\sidenote{ + Noam Chomsky, l'un des linguistes essentialistes les plus importants, considère que la manipulation de probabilités d'extraits de texte ne permet pas d'acquérir une meilleure compréhension du langage. + Suite au succès des approches statistiques, il n'a reconnu qu'un accomplissement de \emph{technè} et non d'\emph{épistémè}. + Pour une réponse à cette position, voir \textcite{statistical_methods, norvig_chomsky}. +} +Au lieu de s'appuyer sur des experts humains pour concevoir un ensemble de règles, les approches statistiques exploitent les répétitions dans de grands corpus de textes pour déduire ces règles automatiquement. +Par conséquent, cette progression peut également être considérée comme une transformation des modèles d'intelligence artificielle symbolique vers des modèles statistiques. +La tâche d'extraction de relations a été formalisée à cette époque. +Et si les premières approches étaient basées sur des modèles symboliques utilisant des règles prédéfinies, les méthodes statistiques sont rapidement devenues la norme après les années 1990. +Cependant, ces modèles statistiques reposaient toujours sur des connaissances linguistiques. +\tatefix{3mm}{6mm}{6mm}% +\begin{cjkepigraph}[\traditionalChinese]{45mm} + {\begin{epigraphcontent}[35mm] + {} + {``Gongsun Longzi'' Chapitre~2} + {circa~300~\textsc{~av.~n.~è.}} + Cheval blanc n'est pas cheval. + \end{epigraphcontent}} + [% + Un paradoxe bien connu de la philosophie chinoise illustrant la difficulté de définir clairement le sens véhiculé par la langue naturelle. + Ce paradoxe peut être résolu en désambiguïsant le mot \frquote{cheval.} + Fait-il référence à \frquote{l'ensemble de tous les chevaux} (la vision méréologique) ou à \frquote{la chevalité} (la vision platonicienne) ? + L'interprétation méréologique a été célèbrement --- et de manière controversée --- introduite par \textcite{hansen_mass_noun_hypothesis}, voir \textcite{chinese_ontology} pour une discussion des premières vues ontologiques du langage en Chine. + ] + 白馬非馬 +\end{cjkepigraph}% +Les systèmes d'extraction de relations étaient généralement divisés en une première phase d'extraction de caractéristiques linguistiques spécifiées à la main et une seconde phase où une relation était prédite à partir de ces caractéristiques à l'aide de modèles statistiques peu profonds. + +Une deuxième évolution est survenue dans les années 2010 lorsque les approches d'apprentissage profond ont effacé la séparation entre les phases d'extraction de caractéristiques et de prédiction. +Les modèles d'apprentissage profond sont entrainés pour traiter directement les données brutes, dans notre cas des extraits de texte. +À cette fin, des réseaux de neurones capables d'approcher n'importe quelle fonction sont utilisés. +Cependant, l'entraînement de ces modèles nécessite généralement de grandes quantités de données étiquetées. +Il s'agit d'un problème particulièrement important pour nous puisque nous traitons un problème non supervisé. +En tant que technique la plus récente et la plus efficace, l'apprentissage profond est un choix naturel pour s'attaquer à l'extraction de relations. +Cependant, ce choix s'accompagne de problématiques que nous essayons de résoudre dans ce manuscrit. + +\begin{marginparagraph} + {\centering\includegraphics[width=\marginparwidth]{frontmatter/OuCuiPo.jpg}\\} + Frontispice de la bibliothèque OuCuiPienne par \textcite{oucuipo}. + Une autre façon de cuisiner avec les lettres. +\end{marginparagraph} diff --git a/backmatter/french/fitb quantitative.tex b/backmatter/french/fitb quantitative.tex @@ -0,0 +1,12 @@ +\begin{tabular}{c c r} + \toprule + \multicolumn{2}{c}{Modèle} & \multirow{2}{*}{\bcubed\fone} \\ + \cmidrule(lr){1-2} + Classif. & Reg. & \\ + \midrule + Linear & \loss{vae reg} & 35,2 \\ + \textsc{pcnn} & \loss{vae reg} & 27,6 \\ + Linear & \(\loss{s}+\loss{d}\) & 37,5 \\ + \textsc{pcnn} & \(\loss{s}+\loss{d}\) & \strong{39,4} \\ + \bottomrule +\end{tabular}% diff --git a/backmatter/french/fitb.tex b/backmatter/french/fitb.tex @@ -0,0 +1,65 @@ +\section{Régularisation des modèles discriminatifs d'extraction non supervisée de relations} +\label{sec:french:fitb} +L'évolution des méthodes d'extraction de relations non supervisées suit de près celle des méthodes de \textsc{tal} décrite ci-dessus. +La première approche utilisant des techniques d'apprentissage profond a été celle de \textcite{vae_re}. +Cependant, une partie de leur modèle reposait toujours sur des caractéristiques linguistiques extraites en amont. +La raison pour laquelle cette extraction ne pouvait pas être faite automatiquement, comme c'est habituellement le cas en apprentissage profond, est étroitement liée à la nature non supervisée du problème. +Notre première contribution est de proposer une technique permettant l'entraînement d'approches d'extraction non supervisée de relations par apprentissage profond. + +Nous avons identifié deux problèmes critiques des modèles discriminants existant qui entravent l'utilisation de réseaux neuronaux profonds pour l'extraction de caractéristiques. +Ces problèmes concernent la sortie du classifieur, +\begin{marginparagraph} + Cette section a fait l'objet d'une publication:\\ + \hbadness=8000% :'( + Étienne Simon, Vincent Guigue, Benjamin Piwowarski. \citefield{fitb}[linkedtitle]{title} \citefield{fitb}{shortseries}~\cite*{fitb} +\end{marginparagraph} +qui a tendance à s'effondrer en une distribution triviale, soit déterministe, soit uniforme. +Nous proposons d'introduire deux fonctions de coût sur la distribution des relations pour atténuer ces problèmes : une fonction d'asymétrie éloigne la prédiction d'une loi uniforme, et une distance de distributions empêche la sortie de s'effondrer vers une distribution déterministe. +Cela nous a permis d'entraîner un modèle \textsc{pcnn} \parencitex{pcnn} pour regrouper les échantillons non supervisés en partitions% +\sidenote{\emph{clusters}} +véhiculant la même relation. + +\begin{marginfigure} + \centering + \input{mainmatter/fitb/problem 1.tex} + \scaption[Illustration du problème d'uniformité.]{ + Illustration du problème d'uniformité. + Le classifieur attribue la même probabilité à toutes les relations. + À la place, nous souhaitons que le classifieur prédise clairement une relation unique pour chaque échantillon. + \label{fig:french:fitb problem} + } +\end{marginfigure} +\begin{margintable} + \centering + \input{backmatter/french/fitb quantitative.tex} + \scaption[Résultats quantitatifs des méthodes de partitionnement sur le dataset \textsc{nyt-fb}.]{ + Résultats quantitatifs des méthodes de partitionnement sur le jeu de données \textsc{nyt-fb}. + On distingue le classifieur utilisé (Classif.) de la régularisation utilisée (Reg.). + La régularisation \loss{vae reg} est celle issue de l'article de \textcite{vae_re}. + \label{tab:french:fitb} + } +\end{margintable} + +Notre approche se base sur le problème de remplissage de texte à trous: +\begin{indentedexample} + ``Le \uhead{sol} a été la monnaie du \utail{~?~} entre 1863 et 1985.'' +\end{indentedexample} +Pour pouvoir remplir cette phrase avec le mot manquant, il est nécessaire de comprendre la relation véhiculée. +Nous utilisons cette tâche comme un substitut nous permettant d'identifier la sémantique relationnelle de la phrase. +Étant donné une phrase \(s\) contenant deux entités \(\vctr{e}\) exprimant la relation \(r\), nous modélisons la probabilité suivante: +\begin{equation*} + P(e_{-i} \mid s, e_i) = + \sum_{r\in\relationSet} \underbrace{P(r\mid s)}_{\text{(i)\,classifieur}} \underbrace{P(e_{-i} \mid r, e_i)}_{\text{(ii)\,prédicteur d'entité}} \qquad \text{pour } i=1, 2. +\end{equation*} + +Nous utilisons un réseau profond (\textsc{pcnn}, \cite{pcnn}) pour le classifieur et le même modèle que \textcite{vae_re} pour la prédiction d'entité. +Le modèle résultant présente des instabilités, comme celle illustrée par la Figure~\ref{fig:french:fitb problem}. +Nous proposons deux fonctions de coût supplémentaires sur les paramètres \(\vctr{\phi}\) du classifieur pour résoudre ces problèmes: +\begin{align*} + \loss{s}(\vctr{\phi}) & = \expectation_{(s, \vctr{e})\sim \uniformDistribution(\dataSet)} \left[ \entropy(\rndm{R} \mid s, \vctr{e}; \vctr{\phi}) \right] \\ + \loss{d}(\vctr{\phi}) & = \kl(P(\rndm{R}\mid\vctr{\phi}) \mathrel{\|} \uniformDistribution(\relationSet)). +\end{align*} + +La première fonction force la sortie du classifieur a avoir une entropie faible ce qui résout le problème de la Figure~\ref{fig:french:fitb problem}. +La seconde fonction s'assure qu'une variété de relations soient prédites pour différents échantillons. +Ces deux fonctions nous permettent d'entrainer un réseau profond pour l'extraction non supervisée de relations comme le montrent les scores de la Table~\ref{tab:french:fitb}. diff --git a/backmatter/french/graph quantitative.tex b/backmatter/french/graph quantitative.tex @@ -0,0 +1,9 @@ +\begin{tabular}{l r} +\toprule + Modèle & Précision \\ +\midrule + Linguistique (\textsc{bert}) & 69,46 \\ + Topologique (\(W_1\)) & 65,75 \\ + Tous les deux & 72,18 \\ +\bottomrule +\end{tabular}% diff --git a/backmatter/french/graph.tex b/backmatter/french/graph.tex @@ -0,0 +1,109 @@ +\section{Modélisation à l'aide de graphes de la structure des jeux de données} +\label{sec:french:graph} +Comme mentionné dans la Section~\ref{sec:french:context}, les approches récentes utilisent une définition plus souple des relations en extrayant une fonction de similarité au lieu d'un classifieur. +De plus, elles considèrent un contexte plus large : au lieu de traiter chaque phrase individuellement, la cohérence globale des relations extraites est prise en compte. +Cependant, ce deuxième type d'approches a principalement été appliqué au cadre supervisé, avec une utilisation plus limitée dans le cadre non supervisé. +Notre deuxième contribution concerne l'utilisation de ce contexte plus large pour l'extraction non supervisée de relations. +En particulier, nous établissons des parallèles avec le test d'isomorphisme de Weisfeiler--Leman pour concevoir de nouvelles méthodes utilisant conjointement des caractéristiques topologiques (au niveau des jeux de données) et linguistiques (au niveau des phrases). + +Nous encodons le problème d'extraction de relations comme un problème d'étiquetage d'un multigraphe \(G=(\entitySet, \arcSet, \gfendpoints, \gfrelation, \gfsentence)\) défini comme suit: +\begin{itemize}[nosep] + \item \(\entitySet\) est l'ensemble des nœuds qui correspondent aux entités. + \item \(\arcSet\) est l'ensemble des arcs qui connectent deux entités. + \item \(\gfsource: \arcSet\to \entitySet\) associe à chaque arc son nœud d'origine (l'entité marquée \(e_1\)), + \item \(\gftarget: \arcSet\to \entitySet\) associe à chaque arc son nœud de destination (l'entité marquée \(e_2\)), + \item \(\gfsentence: \arcSet\to \sentenceSet\) associe à chaque arc \(a\in \arcSet\) la phrase correspondante contenant \(\gfsource(a)\) et \(\gftarget(a)\), + \item \(\gfrelation: \arcSet\to \relationSet\) associe à chaque arc \(a\in\arcSet\) la relation entre les deux entités véhiculée par \(\gfsentence(a)\). +\end{itemize} +Étant donné un chemin dans ce graphe: +\begin{center} + \input{mainmatter/graph/3-path.tex} +\end{center} +nous avons conçu un algorithme de comptage basé sur l'exponentiation de la matrice d'adjacence de \(G\) et sur un échantillonnage préférentiel% +\sidenote{\emph{importance sampling}} +qui nous permet d'approcher l'information mutuelle \(\operatorname{I}(\rndm{r}_2; \rndm{r}_1, \rndm{r}_3) \approx 6\mathord{,}95\ \text{bits}\). +Elle se décompose en une entropie conditionnelle \(\entropy(\rndm{r}_2\mid\rndm{r}_1,\rndm{r}_3)\approx 1\mathord{,}06\ \text{bits}\) soustrait à l'entropie croisée% +\sidenote{\emph{cross-entropy}} +\(\expectation_{r_1, r_3}[\entropy_{P(\rndm{r}_2)}(\rndm{r}_2\mid r_1, r_3)]\approx 8\mathord{,}01\ \text{bits}\). +Cela signifie que la majeure partie de l'information relationnelle est extractible à partir du voisinage dans le graphe \(G\). + +\begin{marginfigure}[-92mm] + \centering + \input{mainmatter/graph/isomorphism.tex} + \scaption[Exemple de graphes isomorphes.]{ + Exemple de graphes isomorphes. + Chaque nœud \(i\) dans le graphe de gauche correspond à la \(i\)-ième lettre de l'alphabet dans le graphe de gauche. + Par ailleurs, ces graphes contiennent des automorphismes nontriviaux, par exemple en associant le nœud \(i\) au nœud \(9-i\). + \label{fig:french:isomorphism} + } +\end{marginfigure} + +Fort de cette observation, nous utilisons l'hypothèse suivante pour concevoir un nouveau paradigme pour l'extraction non supervisée de relations: +\begin{spacedblock} + \strong{Hypothèse distributionnelle faible sur le graphe d'extraction de relations.} + \emph{Deux arcs véhiculent des relations similaires s'ils ont des voisinages similaires.} +\end{spacedblock} + +\begin{marginfigure} + \centering + \input{mainmatter/context/bert.tex} + \scaption[Schéma de \textsc{bert}, un modèle de langue masqué basé sur un \emph{transformer}.]{ + Schéma de \textsc{bert} \parencite{bert}, un modèle de langue masqué basé sur un \emph{transformer}. + Le modèle est entrainé à reconstruire des mots \(\hat{w}_t\) corrompus en \(\tilde{w}_t\) (plongés en \(\tilde{\vctr{x}}_t\)). + \bertcoder{} est une spécialisation de ce modèle pour l'extraction de relations \parencite{mtb}. + \label{fig:french:bert} + } +\end{marginfigure} +\begin{marginparagraph} + \Textcite{gcn_spectral_semi} ont déjà tracé un parallèle entre \textsc{wl} et les approches à base de réseaux neuronaux convolutifs pour graphes (\textsc{gcn}). + Toutefois, nous avançons que les fonctions d'apprentissage habituellement utilisées pour les \textsc{gcn} ne sont pas adaptées au problème d'extraction non supervisée de relations. +\end{marginparagraph} + +Pour exploiter cette information de voisinage présente dans la topologie du multigraphe \(G\), nous proposons de nous inspirer du test d'isomorphisme de Weisfeiler--Leman (\textsc{wl}, \cite{weisfeiler-leman}). +Deux graphes sont dits isomorphes s'il existe un morphisme entre leur sommets qui conserve la relation de voisinage. +Ce concept est illustré par la Figure~\ref{fig:french:isomorphism}. +Nous pouvons donc traduire l'hypothèse ci-dessus par l'affirmation que si les voisinages de deux échantillons sont isomorphes, alors ces deux échantillons véhiculent la même relation. +Pour évaluer la proximité de deux voisinages, nous définissons \(\symfrak{S}_\gfnright(a, k)\), le plongement par \bertcoder{} (voir Figure~\ref{fig:french:bert}) de la sphère de rayon \(k\) autour de l'arête \(a\in\arcSet\) comme: +\begin{align*} + S_\gfnright(a, 0) & = \{\,a\,\} \\ + S_\gfnright(a, k) & = \{\,x\in\arcSet \mid \exists y\in S_\gfnright(a, k-1): \gfsource(x)=\gftarget(y)\,\} \\ + \symfrak{S}_\gfnright(a, k) & = \{\,\bertcoder(\gfsentence(x))\in\symbb{R}^d \mid x\in S_\gfnright(a, k)\,\}. +\end{align*} +Ces sphères correspondent au voisinage à distance \(k\). +À partir de celles-ci, nous pouvons définir une fonction de distance prenant en compte le voisinage jusqu'à une distance \(K\): +\begin{equation*} + d(a, a'; \vctr{\lambda}) = + \sum_{k=0}^K \frac{\lambda_k}{2} + \sum_{o\in\{\gfoleft, \gforight\}} + W_1\left(\symfrak{S}_o(a, k), \symfrak{S}_o(a', k)\right), +\end{equation*} +où \(W_1\) désigne la distance de Wasserstein d'ordre 1. +En particulier, cette fonction évaluée en \(\vctr{\lambda}=[1]\) correspond à la distance habituelle entre plongements de phrases modulo l'utilisation de \(W_1\) à la place d'une distance cosinus. +Pour des raisons de limites de calcul, nous fixons \(K=2\). +Dans ce cas, \(d(a_1, a_2, [1, 0]\transpose)\) correspond à la distance linguistique entre deux échantillons \(a_1, a_2\in\arcSet\), tandis que \(d(a_1, a_2, [0, 1]\transpose)\) correspond à la distance topologique entre les voisinages des échantillons \(a_1\) et \(a_2\). +Nous proposons de faire coïncider ces deux distances pour tirer parti de l'information mutuelle au voisinage et à la phrase afin d'identifier la sémantique relationnelle des échantillons. +Pour ce faire, nous introduisons une fonction de coût par triplet:% +\sidenote{\emph{triplet loss}} +\begin{equation*} + \loss{lt}(a_1, a_2, a_3) = \max\left( + \begin{aligned} + 0, \zeta & + + 2 \big(d(a_1, a_2, [1, 0]\transpose) - d(a_1, a_2, [0, 1]\transpose)\big)^2 \\ + & \hspace{5mm} - \big(d(a_1, a_2, [1, 0]\transpose) - d(a_1, a_3, [0, 1]\transpose)\big)^2 \\ + & \hspace{5mm} - \big(d(a_1, a_3, [1, 0]\transpose) - d(a_1, a_2, [0, 1]\transpose)\big)^2 + \end{aligned} + \right). +\end{equation*} +\begin{margintable} + \centering + \input{backmatter/french/graph quantitative.tex} + \scaption[Résultats quantitatifs des méthodes à base de graphe sur le jeu de données FewRel.]{ + Résultats quantitatifs des méthodes à base de graphe sur le jeu de données FewRel \parencite{fewrel}. + Ces résultats portent uniquement sur les échantillons de FewRel connectés par au moins une arête dans le graphe \(G\) du jeu de données \textsc{t-re}x. + } + \label{tab:french:graph quantitative} +\end{margintable} + +Des résultats préliminaires sur l'utilisation d'informations topologiques sont donnés dans la Table~\ref{tab:french:graph quantitative}. +Comme on pouvait s'y attendre, l'information relationnelle encodée dans le voisinage d'ordre 1 du graphe est moindre que celle directement contenue dans la phrase. +Toutefois, ces informations peuvent être combinées ce qui permet d'améliorer significativement la performance du modèle d'extraction de relation. diff --git a/backmatter/french/introduction.tex b/backmatter/french/introduction.tex @@ -0,0 +1,35 @@ +\begin{marginparagraph} + {\centering\includegraphics[width=0.9\marginparwidth]{frontmatter/Cheshire Cat.png}\\} + Le chat du Cheshire de \textcite{cat} vous fournit une expérience de \emph{félinité}. +\end{marginparagraph} + +\bigskip + +Le monde est doté d'une structure, qui nous permet de le comprendre. +Cette structure est en premier lieu apparente à travers la répétition de nos expériences sensorielles. +Parfois, nous voyons un chat, puis un autre chat. +Les entités émergent de la répétition de l'expérience de \emph{félinité} que nous avons ressentie. +De temps en temps, nous pouvons également observer un chat \textsl{à l'intérieur} d'un carton ou une personne \textsl{à l'intérieur} d'une pièce. +Les relations sont le mécanisme explicatif qui sous-tend ce deuxième type de répétition. +Une relation régit une interaction entre au moins deux objets. +Nous supposons qu'une relation \textsl{à l'intérieur} existe parce que nous avons vécu à plusieurs reprises la même interaction entre un conteneur et son contenu. +Le vingtième siècle a été traversé par le développement du structuralisme, qui considérait que les interrelations entre phénomènes étaient plus éclairantes que l'étude des phénomènes pris isolément. +En d'autres termes, nous pourrions mieux comprendre ce qu'est un chat en étudiant ses relations avec d'autres entités plutôt qu'en énumérant les caractéristiques de notre expérience de la \emph{félinité}. +De ce point de vue, le concept de relation est crucial dans notre compréhension du monde. + +\begin{marginparagraph}[-1cm] + Les relations --- quoique dans un sens plus restreint --- sont l'un des dix \emph{prédicaments} d'Aristote, les catégories d'objets d'appréhension humaine \parencite{sep_medieval_categories}. +\end{marginparagraph} + +Les langues naturelles saisissent la structure sous-jacente de ces répétitions à travers un processus que nous ne comprenons pas entièrement. +L'un des objectifs de l'intelligence artificielle, appelé compréhension du langage naturel, est d'imiter ce processus à l'aide d'algorithmes. +Puisque ce but nous échappe encore, nous nous efforçons d'en modéliser seulement des parties. +Cette thèse, suivant la perspective structuraliste, se concentre sur l'extraction des relations véhiculées par la langue naturelle. +En supposant que la langue naturelle est représentative de la structure sous-jacente des expériences sensorielles,% +\sidenote{ + Les répétitions d'expériences sensorielles et de mots n'ont pas à être nécessairement identiques. + Nous ne nous préoccupons ici que de la possibilité de résoudre les références. + Même si nos expériences d'arbres s'accompagnent généralement d'expériences d'écorces, les mots \frquote{arbre} et \frquote{écorce} ne cooccurrent pas aussi souvent dans des expressions en langue naturelle. + Cependant, leur relation méronymique est intelligible à la fois par l'expérience d'arbres et, entre autres, par l'utilisation de la préposition \frquote{de} dans les mentions écrites d'écorces. +} +nous devrions être en mesure de capturer les relations en exploitant uniquement les répétitions, c'est-à-dire de manière non supervisée. diff --git a/backmatter/french/title.tex b/backmatter/french/title.tex @@ -0,0 +1,18 @@ +\thispagestyle{empty}% +\begin{fullwidth} + \vspace{10mm} + \centering + \makeatletter + {\garamond\Huge\@title} + \makeatother + \par + \vspace{15mm} + \hfill Défendu par Étienne Simon + \par + \hfill sous la direction de Benjamin Piwowarski et Vincent Guigue + \par + \vspace{10mm} + Ce document est un résumé de la thèse en anglais disponnible à l'adresse suivante: \url{https://esimon.eu/PhD} + \par + \vspace{10mm} +\end{fullwidth}% diff --git a/french summary.tex b/french summary.tex @@ -0,0 +1,18 @@ +\documentclass[summary,digital]{thesis} + +\title{Apprentissage profond pour l'extraction non supervisée de relations} +\author{Étienne Simon} +\keywords{Apprentissage automatique\sep Apprentissage profond\sep Traitement automatique de la langue\sep Extraction d'information\sep Extraction de relations} + +\usepackage{thesis} +\addbibresource{thesis.bib} + +\begin{document} + \begin{otherlanguage}{french}% + \withmarginpar% + \input{backmatter/french/appendix} + \begin{fullwidth} + \printbibliography[heading=subbibintoc] + \end{fullwidth} + \end{otherlanguage}% +\end{document} diff --git a/frontmatter/Cheshire Cat.png b/frontmatter/Cheshire Cat.png Binary files differ. diff --git a/frontmatter/OuCuiPo.jpg b/frontmatter/OuCuiPo.jpg Binary files differ. diff --git a/frontmatter/Paris Quadrifolia.jpg b/frontmatter/Paris Quadrifolia.jpg Binary files differ. diff --git a/frontmatter/Ship of Theseus.jpg b/frontmatter/Ship of Theseus.jpg Binary files differ. diff --git a/frontmatter/abbreviations.tex b/frontmatter/abbreviations.tex @@ -0,0 +1,53 @@ +\chapter{List of Abbreviations} +\begin{longtable}[l]{@{}l l@{}} + \textsc{ace} & Automatic Content Extraction (Section~\ref{sec:datasets:ace}) \\ + \textsc{acl} & Association for Computational Linguistics \\ + \textsc{ari} & Adjusted Rand Index (Section~\ref{sec:relation extraction:clustering}) \\ + \textsc{bert} & Bidirectional Encoder Representations from Transformers (Section~\ref{sec:context:transformers}) \\ + \textsc{bpe} & Byte-Pair Encoding (Section~\ref{sec:context:bpe}) \\ + \textsc{bpr} & Bayesian Personalized Ranking (Section~\ref{sec:relation extraction:universal schemas}) \\ + \textsc{cnn} & Convolutional Neural Network (Section~\ref{sec:context:cnn}) \\ + \textsc{dae} & Denoising AutoEncoder (Section~\ref{sec:relation extraction:selfore}) \\ + \textsc{darpa} & Defense Advanced Research Projects Agency (Section~\ref{sec:datasets:muc}) \\ + \textsc{dec} & Deep Embedded Clustering (Section~\ref{sec:relation extraction:selfore}) \\ + \textsc{dipre} & Dual Iterative Pattern Relation Expansion (Section~\ref{sec:relation extraction:dipre}) \\ + \textsc{dirt} & Discovery of Inference Rules from Text (Section~\ref{sec:relation extraction:dirt}) \\ + \textsc{elbo} & Evidence Lower BOund (Section~\ref{sec:relation extraction:vae}) \\ + \textsc{elm}o & Embeddings from Language Model (Section~\ref{sec:context:elmo}) \\ + \textsc{epgnn} & Entity Pair Graph Neural Network (Section~\ref{sec:relation extraction:epgnn}) \\ + \textsc{fb} & FreeBase (Section~\ref{sec:datasets:freebase}) \\ + \textsc{gat} & Graph ATtention network (Section~\ref{sec:graph:spatial gcn}) \\ + \textsc{gcn} & Graph Convolutional Network (Section~\ref{sec:graph:related work}) \\ + \textsc{gnn} & Graph Neural Network (Section~\ref{sec:graph:related work}) \\ + \textsc{gis} & Generalized Iterative Scaling (Section~\ref{sec:relation extraction:hand-designed features}) \\ + \textsc{gpe} & Geo-Political Entity (Section~\ref{sec:relation extraction:hasegawa}) \\ + \textsc{gru} & Gated Recurrent Unit (Section~\ref{sec:context:lstm}) \\ + \textsc{idf} & Inverse Document Frequency (Section~\ref{sec:relation extraction:hasegawa}) \\ + \textsc{jsd} & Jensen--Shannon Divergence (Section~\ref{sec:fitb:variants}) \\ + \textsc{lda} & Latent Dirichlet Allocation (Section~\ref{sec:relation extraction:rellda}) \\ + \textsc{lsa} & Latent Semantic Analysis (Section~\ref{sec:context:word}) \\ + \textsc{lsi} & Latent Semantic Indexing (Section~\ref{sec:context:word}) \\ + \textsc{lstm} & Long Short-Term Memory (Section~\ref{sec:context:lstm}) \\ + \textsc{miml} & Multi-Instance Multi-Label (Section~\ref{sec:relation extraction:miml}) \\ + \textsc{mlm} & Masked Language Model (Section~\ref{sec:context:mlm}) \\ + \textsc{mtb} & Matching The Blanks (Sections~\ref{sec:relation extraction:mtb sentential} and~\ref{sec:relation extraction:mtb}) \\ + \textsc{muc} & Message Understanding Conference (Section~\ref{sec:datasets:muc}) \\ + \textsc{nlp} & Natural Language Processing (Sections~\ref{sec:context:word} and~\ref{sec:context:sentence}) \\ + \textsc{nce} & Noise Contrastive Estimation (Section~\ref{sec:context:nce}) \\ + \textsc{ner} & Named Entity Recognition (Chapter~\ref{chap:relation extraction}) \\ + \textsc{nist} & National Institute of Standards and Technology (Section~\ref{sec:datasets:ace}) \\ + \textsc{nmt} & Neural Machine Translation (Section~\ref{sec:context:attention}) \\ + \textsc{nyt} & New York Times (Section~\ref{sec:datasets:nyt}) \\ + \textsc{oie} & Open Information Extraction (Section~\ref{sec:relation extraction:oie}) \\ + \textsc{pcnn} & Piecewise Convolutional Neural Network (Section~\ref{sec:relation extraction:pcnn}) \\ + \textsc{pmi} & Pointwise Mutual Information (Section~\ref{sec:relation extraction:dirt}) \\ + \textsc{pos} & Part Of Speech (Figure~\ref{fig:relation extraction:dependency tree}) \\ + \textsc{ri} & Rand Index (Section~\ref{sec:relation extraction:clustering}) \\ + \textsc{rnn} & Recurrent Neural Network (Section~\ref{sec:context:rnn}) \\ + \textsc{svm} & Support Vector Machine (Section~\ref{sec:relation extraction:kernel}) \\ + \textsc{sgns} & Skip-Gram Negative Sampling (Section~\ref{sec:context:word2vec}) \\ + \textsc{tf} & Term Frequency (Section~\ref{sec:relation extraction:hasegawa}) \\ + \textsc{vae} & Variational AutoEncoder (Section~\ref{sec:relation extraction:vae}) \\ + \textsc{wl} & Weisfeiler--Leman isomorphism test (Section~\ref{sec:graph:weisfeiler-leman}) \\ + \textsc{wmt} & Workshop on statistical Machine Translation (Section~\ref{sec:context:history}) \\ +\end{longtable} diff --git a/frontmatter/abstract.tex b/frontmatter/abstract.tex @@ -0,0 +1,15 @@ +\chapter{Abstract} +Capturing concepts' interrelations is a fundamental of natural language understanding. +It constitutes a bridge between two historically separate approaches of artificial intelligence: the use of symbolic and distributed representations. +However, tackling this problem without human supervision poses several issues, and unsupervised models have difficulties echoing the expressive breakthroughs of supervised ones. +This thesis addresses two supervision gaps we identified: the problem of regularization of sentence-level discriminative models and the problem of leveraging relational information from dataset-level structures. + +\smallskip + +The first gap arises following the increased use of discriminative approaches, such as deep neural network classifiers, in the supervised setting. +These models tend to collapse without supervision. +To overcome this limitation, we introduce two relation distribution losses to constrain the relation classifier into a trainable state. +The second gap arises from the development of dataset-level (aggregate) approaches. +We show that unsupervised models can leverage a large amount of additional information from the structure of the dataset, even more so than supervised models. +We close this gap by adapting existing unsupervised methods to capture topological information using graph convolutional networks. +Furthermore, we show that we can exploit the mutual information between topological (dataset-level) and linguistic (sentence-level) information to design a new training paradigm for unsupervised relation extraction. diff --git a/frontmatter/acknowledgements.tex b/frontmatter/acknowledgements.tex @@ -0,0 +1,11 @@ +\chapter{Acknowledgements} +\e{Code couleur des commentaires:} +\begin{itemize} + \item \e{Moi} + \item \benj{Benjamin} + \item \vinc{Vincent} + \item \reu{Réunion} + \item \syr{Syrielle} +\end{itemize} + +\e{Trois autres couleurs sont régulièrement utilisées dans des figures: \tikz{\path[fill=Dark2-A] (0,0) rectangle (1.5mm,2mm);\path[fill=Dark2-B] (1.5mm,0) rectangle (3mm,2mm);\path[fill=Dark2-C] (3mm,0) rectangle (4.5mm,2mm);}, ces éléments ont vocation à être permanents.} diff --git a/frontmatter/gavagai 1.tex b/frontmatter/gavagai 1.tex @@ -0,0 +1 @@ +\tikz{\path[fill=black,scale=0.00025mm] svg {M6928 12784 c-117 -28 -201 -90 -239 -177 -29 -66 -35 -191 -45 -912 -9 -661 5 -895 66 -1085 26 -81 141 -317 210 -430 128 -210 311 -466 518 -723 138 -173 175 -252 202 -442 14 -102 12 -417 -5 -675 -8 -124 -19 -308 -25 -410 -14 -249 -47 -389 -107 -460 -33 -40 -48 -47 -123 -59 -135 -22 -276 -110 -464 -290 -149 -142 -209 -186 -266 -196 -61 -10 -92 1 -197 72 -220 148 -506 251 -933 337 -190 38 -314 49 -565 49 -475 0 -990 -73 -1400 -198 -172 -53 -176 -54 -296 -115 -606 -306 -1819 -1412 -2047 -1866 -94 -187 -181 -537 -232 -930 -53 -414 -80 -970 -61 -1239 13 -182 9 -238 -20 -297 -47 -92 -122 -77 -232 47 -80 91 -186 128 -266 95 -25 -10 -37 -31 -84 -143 -79 -187 -107 -290 -107 -392 0 -85 18 -191 46 -277 l16 -48 -88 -82 c-103 -98 -171 -186 -180 -235 -15 -78 41 -169 231 -378 65 -71 157 -172 203 -223 231 -254 663 -535 896 -582 35 -7 84 -9 117 -6 101 12 361 7 480 -9 167 -22 260 -48 388 -111 201 -98 430 -168 701 -213 190 -32 252 -37 980 -86 1177 -78 1363 -88 1655 -88 256 -1 287 1 356 20 104 29 184 71 264 139 172 146 170 206 -21 467 -154 210 -183 290 -126 347 39 39 83 49 257 61 83 5 172 14 198 19 26 6 137 51 245 101 250 115 381 160 488 167 109 6 151 -12 230 -104 129 -150 194 -208 294 -258 123 -63 215 -122 430 -275 365 -261 522 -348 689 -384 85 -18 129 -21 381 -20 357 0 666 32 907 94 295 75 375 251 203 450 -46 54 -156 131 -465 324 -469 293 -556 369 -614 538 -55 158 -29 292 78 410 76 84 147 118 340 161 270 61 391 133 495 293 107 167 152 331 166 620 15 303 49 467 124 594 34 58 66 147 85 234 42 203 65 609 41 736 -15 77 -61 169 -134 268 -94 127 -109 162 -104 236 6 74 34 131 96 197 66 71 103 87 230 99 316 29 802 139 957 216 91 46 241 212 430 475 235 328 233 325 247 371 7 23 12 89 12 147 -1 84 -6 121 -26 182 -49 150 -131 265 -239 339 -121 82 -164 146 -263 392 -81 200 -124 278 -223 409 -48 64 -151 217 -227 340 -289 464 -353 538 -546 637 -245 126 -491 197 -738 211 -152 9 -195 22 -218 68 -10 20 -21 158 -54 682 -17 280 -138 731 -341 1277 -147 396 -341 815 -493 1067 -76 127 -96 150 -151 179 -79 41 -291 17 -433 -50 -97 -45 -128 -136 -143 -408 -11 -225 -16 -259 -38 -286 -53 -65 -165 55 -366 390 -93 155 -166 250 -273 353 -105 101 -213 175 -310 212 -139 53 -306 69 -424 40z};}% diff --git a/frontmatter/gavagai 2.tex b/frontmatter/gavagai 2.tex @@ -0,0 +1 @@ +\tikz[yscale=-1]{\path[fill=black,scale=0.035mm] svg {m 183.49414,115.23047 c -3.81098,12.24179 12.84968,19.95604 11.57001,24.05752 -6.64714,2.5589 -13.99937,-1.18702 -20.98407,0.87803 -5.72124,-0.38319 -11.32489,1.02518 -16.43165,3.53261 -6.1361,1.98224 -10.16977,7.1655 -14.86379,11.22545 -3.46421,1.56846 -4.48943,6.21694 -6.80417,7.97826 -3.67223,-0.51131 -12.52401,-0.30975 -7.96875,6.32032 1.22056,2.38168 5.74481,6.32356 7.58984,4.56054 -2.37787,2.85489 -4.76622,1.41759 -6.31186,4.92149 -2.09973,4.18627 -2.54421,9.11039 -4.5065,13.24648 -0.52992,2.95364 0.25025,11.14353 3.95703,9.28711 7.27436,-4.48497 1.7424,-19.62159 19.27555,-18.14501 4.56831,-0.43332 7.78862,-4.46481 11.285,-7.07374 2.72037,-0.83502 5.98898,-2.79333 8.30898,-4.0823 5.21052,-2.79278 23.44071,-5.8845 25.03713,-4.95161 5.38688,1.5269 11.2458,0.98745 16.5212,3.17114 2.70776,1.45774 11.19537,5.67286 10.5537,-0.10422 -0.34727,-2.5038 -6.75528,-5.51779 -1.17781,-4.02164 3.8735,0.54707 9.87288,4.05325 12.26266,-0.44106 -0.19132,-4.86561 -5.18125,-1.92181 -7.51391,-3.49931 -2.53108,-0.93673 -3.59218,-1.72059 -6.80358,-2.42259 -5.9747,-0.69802 -0.9583,-5.04789 2.5874,-4.86923 5.88263,-0.28877 4.05615,-8.50343 1.56939,-11.28477 -2.49124,-5.22101 -7.8256,-8.04172 -13.19477,-9.428 -2.34911,-4.86553 -4.26139,-10.07952 -7.70373,-14.28498 -2.40041,-2.34951 -8.0586,-4.47652 -10.11463,-2.27948 -1.07473,0.35299 -4.41575,-7.67208 -6.13867,-2.29101 z};}% diff --git a/frontmatter/introduction.tex b/frontmatter/introduction.tex @@ -0,0 +1,255 @@ +\chapter{Introduction} +\begin{onehalfspace} +The world is endowed with a structure, which enables us to understand it. +This structure is most apparent through repetitions of sensory experiences. +Sometimes, we can see a cat, then another cat. +Entities emerge from the repetition of catness we experienced. +From time to time, we can also observe a cat \textsl{inside} a cardboard box or a person \textsl{inside} a room. +\begin{marginparagraph} + Relations---albeit in a more restrictive sense---are one of Aristotle's ten \emph{praedicamenta}, the categories of objects of human apprehension \parencite{sep_medieval_categories}. +\end{marginparagraph} +Relations are the explanatory device underlying this second kind of repetition. +A relation governs an interaction between two or more objects. +We assume an \textsl{inside} relation exists because we repeatedly experienced the same interaction between a container and its content. +The twentieth century saw the rise of structuralism, which regarded the interrelations of phenomena as more enlightening than the study of phenomena in isolation. +In other words, we might better understand what a cat is by studying its relationships to other entities instead than by listing the characteristics of catness. +From this point of view, the concept of relation is crucial to our understanding of the world. + +\begin{marginparagraph} + \includegraphics[width=\marginparwidth]{frontmatter/Cheshire Cat.png} + The Cheshire Cat from \textcite{cat} provides you with an experience of catness. +\end{marginparagraph} + +Natural languages capture the underlying structure of these repetitions through a process we do not fully understand. +One of the endeavors of artificial intelligence, called natural-language understanding, is to mimic this process with definite algorithms. +Since the aforementioned goal is still elusive, we strive to model only parts of this process. +This thesis, consequent to the structuralist perspective, focuses on extracting relations conveyed by natural language. +Assuming natural language is representative of the underlying structure of sensory experiences,% +\sidenote{ + The repetitions of sensory experiences and words need not be alike. + We are only concerned with the possibility of resolving references here. + Even though our experiences of trees are more often than not accompanied with experiences of bark, the words ``tree'' and ``bark'' do not co-occur as often in natural language utterances. + However, their meronymic relationship is understandable both through experiences of trees and inter alia through the use of the preposition ``of'' in textual mentions of barks. +} +we should be able to capture relations through the exploitation of repetitions alone---i.e.\ in an unsupervised fashion. + +Extracting relations can help better our understanding of how languages work. +For example, whether languages can be understood through a small amount of data is still a somewhat open question in linguistics. +The poverty of the stimulus argument states that children should not be able to acquire proficiency from being exposed to so little data. +It is one of the major arguments in favor of the controversial universal grammar theory. +Capturing relations from nothing more than a small number of natural language utterances would be a step towards disproving the poverty of the stimulus claim. + +This kind of incentive for tackling the relation extraction problem stems from an \emph{episteme}% +\sidenote{From the Ancient Greek \foreignlanguage{greek}{ἐπιστήμη}: knowledge, know-how.} +endeavor. +However, most of the traction for this problem stems from a \emph{techne}% +\sidenote{From the Ancient Greek \foreignlanguage{greek}{τέχνη}: craft, art.} +undertaking. +The end goal is to build a system with real-world applications. +Under this perspective, the point of artificial intelligence is to replace or assist humans on specific tasks. +Most tasks of interest necessitate some form of technical knowledge (e.g.\ diagnosing a disease requires knowledge of the relationship between symptoms and diseases). +The principal vector of knowledge is language (e.g.\ through education). +Thus, knowledge acquisition from natural language is fundamental for systems purposing to have such applications. + +For an analysis of the real-world impact of systems extracting knowledge from text, refer to \textcitex{assisted_curation}. +Their article shows that human curators can use a machine learning system to better extract a set of protein--protein interactions from biomedical literature. +This is clearly a \emph{techne} endeavor: the protein--protein interactions are not new knowledge, they are already published; however, the system improves the work of the human operator. + +\begin{epigraph} + {Willard Van Orman Quine} + {\citetitle{quine_two_dogma}} + {\cite*{quine_two_dogma}} + [][-26mm] + Once the theory of meaning is sharply separated from the theory of reference, it is a short step to recognizing as the business of the theory of meaning simply the synonymy of linguistic forms and the analyticity of statements; meanings themselves, as obscure intermediary entities, may well be abandoned. +\end{epigraph} + +This example of application is revealing of the larger problem of information explosion. +The quantity of published information has grown relentlessly throughout the last decades. +Machine learning can be used to filter or aggregate this large amount of data. +In this case, the object of interest is not the text in itself but the conveyed semantic, its meaning. +This begs the question: how to define the meaning we are seeking to process? +Indeed, foundational theories of meaning are the object of much discussion in the philosophy community \parencite{sep_meaning}. +While some skeptics, like Quine, do not recognize meaning as a concept of interest, they reckon that a minimal description of meaning should at least encompass the recognition of synonymy. +This follows from the above discussion about the recognition of repetitions: if \input{frontmatter/gavagai 1.tex} is a repetition of \input{frontmatter/gavagai 2.tex}, we should be able to say that \input{frontmatter/gavagai 1.tex} and \input{frontmatter/gavagai 2.tex} are synonymous. +In practice, this implies that we ought to be able to extract classes of linguistic forms with the same meaning or referent---the difference between the two is not relevant to our problem. + +\begin{marginparagraph}[-47mm] + \includegraphics[width=\marginparwidth]{frontmatter/Paris Quadrifolia.jpg} + Paris (\wdent{162121}) is neither capital of France, nor prince of Troy, it is the genus of the true lover's knot plant. + The capital of France would be Paris (\wdent{90}) and the prince of Troy, son of Priam, Paris (\wdent{167646}). + Illustration from \textcite{paris_quadrifolia}. + \label{margin:introduction:paris quadrifolia} +\end{marginparagraph} + +While the above discussion of meaning is essential to define our objects of interest, relations, it is important to note that we work on language; we want to extract relations from language, not from repetitions of abstract entities. +Yet, the mapping between linguistic signifiers and their meaning is not bijective. +We can distinguish two kinds of misalignment between the two: either two expressions refer to the same object (synonymy), or the same expression refers to different objects depending on the context in which it appears (homonymy). +The first variety of misalignment is the most common one, especially at the sentence level. +For example, ``Paris is the capital of France'' and ``the capital of France is Paris'' convey the same meaning despite having different written and spoken forms. +On the other hand, the second kind is principally visible at the word level. +For example, the preposition ``from'' in the phrases ``retinopathy from diabetes'' and ``Bellerophon from Corinth'' conveys either a \textsl{has effect} relationship or a \textsl{birthplace} one. +To distinguish these two uses of ``from,'' we can use relation identifiers such as \wdrel{1542} for \textsl{has effect} and \wdrel{19} for \textsl{birthplace}. +An example with entity identifiers---which purpose to uniquely identify entity concepts---is provided in the margin of page~\pageref{margin:introduction:paris quadrifolia}. + +\begin{marginparagraph}[-2cm] + Throughout this thesis, we will be using Wikidata identifiers (\url{https://www.wikidata.org}) to index entities and relations. + Entities identifiers start with \texttt{Q}, while relation identifiers start with \texttt{P}. + For example, \wdent{35120} is an entity. +\end{marginparagraph} + +While the preceding discussion makes it seems as if all objects can fit nicely into clearly defined concepts, in practice, this is far from the truth. +Early in the knowledge-representation literature, \textcite{is-a_analysis} remarked the difficulty to clearly define even seemingly simple relations such as \textsl{instance of} (\wdrel{31}). +This problem ensues from the assumption that synonymy is transitive, and therefore, induces equivalence classes. +This assumption is fairly natural since it already applies to the link between language and its references: even though two cats might be very unlike one another, we still group them under the same signifier. +However, language is flexible. +When trying to capture the entity ``cat,'' it is not entirely clear whether we should group ``a cat with the body of a cherry pop tart'' with regular experiences of catness.% +\sidenote{The reader who would describe this as a cat is invited to replace various body parts of this imaginary cat with food items until they stop experiencing catness.} +To circumvent this issue, some recent works \parencite{fewrel} on the relation extraction problem define synonymy as a continuous intransitive association. +Instead of grouping linguistic forms into clear-cut classes with a single meaning, they extract a similarity function defining how similar two objects are. + +Now that we have conceptualized our problem, let us focus on our proposed technical approach. +First, to summarize, this thesis focus on unsupervised relation extraction from text.% +\sidenote[][-11mm]{We use text as it is the most definite and easy-to-process rendition of language.} +Since relations are objects capturing the interactions between entities, our task is to find the relation linking two given entities in a piece of text. +For example, in the three following samples where entities are underlined: +\begin{marginparagraph}[-11mm] + \includegraphics[width=\marginparwidth]{frontmatter/Ship of Theseus.jpg} + Ariadne waking on the shore of Naxos where she was abandoned, wall painting from Herculaneum in the collection of \textcite{ship_of_theseus}. + The ship in the distance can be identified as the ship of Theseus, for now. + Depending on the philosophical view of the reader (\wdent{1050837}), its identity as the ship of Theseus might not linger for long. +\end{marginparagraph} +\begin{indentedexample} + \uhead{Megrez} is a star in the northern circumpolar constellation of \utail{Ursa Major}. + \smallskip + + \uhead{Posidonius} was a Greek philosopher, astronomer, historian, mathematician, and teacher native to \utail{Apamea, Syria}. + \smallskip + + \uhead{Hipparchus} was born in \utail{Nicaea, Bithynia}, and probably died on the island of Rhodes, Greece. +\end{indentedexample} +we wish to find that the last two sentences convey the same relation---in this case, \sfTripletHolds{e_1}{born in}{e_2} (\wdrel{19})---or at the very least, following the discussion in the preceding paragraph about the difficulty of defining clear relation classes, we wish to find that the relations conveyed by the last two samples are closer to each other than the one conveyed by the first sample. +We propound that this can be performed by machine learning algorithms. +In particular, we study how to approach this task using deep learning. +While relation extraction can be tackled as a standard supervised classification problem, labeling a dataset with precise relations is a tedious task, especially with technical documents such as the biomedical literature studied by \textcite{assisted_curation}. +Another problem commonly encountered by annotators is the question of applicability of a relation, for example, should ``the \uhead{country}'s founding \utail{father}'' be labeled with the \textsl{product--producer} relation?% +\sidenote{ + The annotator of this sentence piece in the SemEval~2010 Task~8 dataset (Section~\ref{sec:datasets:semeval}) decided that it does convey the \textsl{product--producer} relation. + The difficulty of applying a definition is an additional argument in favor of similarity-function-based approaches over classification approaches. +} +We now discuss how deep learning became the most promising technique to tackle natural language processing problems. + +The primary subject matter of the relation extraction problem is language. +Natural language processing (\textsc{nlp}) was already a prominent research interest in the early years of artificial intelligence. +This can be seen from the \emph{episteme} viewpoint in the seminal paper of \textcitex{turing_test}. +This paper proposes mastery of language as evidence of intelligence, in what is now known as the Turing test. +Language was also a subject of interest for \emph{techne} objectives.% +\begin{epigraph} + {Leon Dostert} + {``701~translator'' \textsc{ibm} press release} + {1954} + Five, perhaps three years hence, interlingual meaning conversion by electronic process in important functional areas of several languages may well be an accomplished fact. +\end{epigraph} +In January 1954, the Georgetown--\textsc{ibm} experiment tried to demonstrate the possibility of translating Russian into English using computers \parencite{georgetown-ibm}. +The experiment showcased the translation of sixty sentences using a bilingual dictionary to translate words individually and six kinds of grammatical rules to reorder tokens as needed. +Initial experiments created an expectation buildup, which was followed by an unavoidable disappointment, resulting in an ``\textsc{ai} winter'' where research fundings were restricted. +While translating word-by-word is somewhat easy in most cases, translating whole sentences is a lot harder. +Scaling up the set of grammatical rules in the Georgetown--\textsc{ibm} experiment proved impractical. +This limitation was not a technical one. +With the improvement of computing machinery, more rules could have easily been encoded. +One of the issues identified at the time was the commonsense knowledge problem \parencite{commonsense}. +In order to translate or, more generally, process a sentence, it needs to be understood in the context of the world in which it was uttered. +Simple rewriting rules cannot capture this process.% +\sidenote[][-32mm]{ + Furthermore, grammar is still an active area of research. + We do not perfectly understand the underlying reality captured by most words and are thus unable to write down complete formal rules for their usages. + For example, \textcite{over_grammar} is a 43~pages cognitive linguistics paper attempting to explain the various uses of the English preposition ``over.'' + This is one of the arguments for unsupervised approaches; we should avoid hand-labeled datasets if we want to outperform the human annotators. +} +In order to handle whole sentences, a paradigm shift was necessary. + +A first shift occurred in the 1990s with the advent of statistical \textsc{nlp} \parencite{statistical_methods}. +This evolution can be partly attributed to the increase of computational power, but also to the progressive abandon of essentialist linguistics precepts% +\sidenote{ + Noam Chomsky, one of the most---if not the most---prominent essentialist linguists, considers that manipulating probabilities of text excerpt is not the way to acquire a better understanding of language. + Following the success of statistical approaches, he only recognized statistical \textsc{nlp} as a \emph{techne} achievement. + For an answer to this position, see \textcite{statistical_methods, norvig_chomsky}. +} +in favor of distributionalist ones. +Instead of relying on human experts to input a set of rules, statistical approaches leveraged the repetitions in large text corpora to infer these rules automatically. +Therefore, this progression can also be seen as a transition away from symbolic artificial intelligence models and towards statistical ones. +Coincidently, the relation extraction task was formalized at this time. +And while the earliest approaches were based on symbolic models using handwritten rules, statistical methods quickly became the norm after the 1990s. +However, statistical \textsc{nlp} models still relied on linguistic knowledge. +The relation extraction systems were usually split into a first phase of hand-specified linguistic features extraction and a second phase where a relation was predicted based on these features using shallow statistical models. + +\tatefix{3mm}{5mm}{6mm} +\begin{cjkepigraph}[\traditionalChinese]{45mm} + {\begin{epigraphcontent}[35mm] + {} + {``Gongsun Longzi'' Chapter~2} + {circa~300~\textsc{bce}} + White horse is not horse. + \end{epigraphcontent}} + [% + A well-known paradox in early Chinese philosophy illustrating the difficulty of clearly defining the meaning conveyed by natural languages. + This paradox can be resolved by disambiguating the word ``horse.'' + Does it refers to the ``whole of all horse kind'' (the mereological view) or to ``horseness'' (the Platonic view)? + The mereological interpretation was famously---and controversly---introduced by \textcite{hansen_mass_noun_hypothesis}, see \textcite{chinese_ontology} for a discussion of early Chinese ontological views of language. + ] + 白馬非馬 +\end{cjkepigraph} + +A second shift occurred in the 2010s when deep learning approaches erased the split between feature extraction and prediction. +Deep learning models are trained to directly process raw data, in our case text excerpts. +To achieve this feat, neural networks able to approximate any function are used. +However, the downside of these models is that they usually require large amounts of labeled data to be trained. +This is a particularly salient problem throughout this thesis since we deal with an unsupervised problem. +As the latest and most efficient technique available, deep learning proved to be a natural choice to tackle relation extraction. +However, this natural evolution came with serious complications that we try to address in this manuscript. + +\begin{marginparagraph} + \includegraphics[width=\marginparwidth]{frontmatter/OuCuiPo.jpg} + Frontispiece of the OuCuiPian Library by \textcite{oucuipo}. + A different kind of cooking with letters. +\end{marginparagraph} + +The evolution of unsupervised relation extraction methods closely follows the one of \textsc{nlp} methods described above. +The first deep learning approach was the one of \textcite{vae_re}. +However, only part of their model relied on deep learning techniques, the extraction of features was still done manually. +The reason why feature extraction could not be done automatically as is standard in deep learning approaches is closely related to the unsupervised nature of the problem. +Our first contribution is to propose a technique to enable the training of unsupervised fully-deep learning relation extraction approaches. +Afterward, different ways to tackle the relation extraction task emerged. +First, recent approaches use a softer definition of relations by extracting a similarity function instead of a classifier. +Second, they consider a broader context: instead of processing each sentence individually, the global consistency of extracted relations is considered. +However, this second approach was mostly limited to the supervised setting, with limited use in the unsupervised setting. +Our second contribution concerns using this broader context for unsupervised relation extraction, in particular for approaches defining a similarity function. +During the preparation of the thesis, we also published an article on multimodal semantic role labeling with Syrielle Montariol and her team \parencite{mmsrl}; since it is somewhat unrelated to unsupervised relation extraction, we do not include it in this thesis. +\begin{marginparagraph} + Syrielle Montariol,\textsuperscript{*} Étienne Simon,\textsuperscript{*} Arij Riabi, Djamé Seddah. \citefield{mmsrl}[linkedtitle]{title} \citefield{mmsrl}{shortseries}~\cite*{mmsrl} + + \raggedleft\scriptsize\textsuperscript{*}\,Equal contributions +\end{marginparagraph} + +We now describe the organization of the thesis. +Chapter~\ref{chap:context} provides the necessary background for using deep learning to tackle the relation extraction problem. +In particular, we focus on the concept of distributed representation, first of language, then of entities and relations. +Chapter~\ref{chap:relation extraction} formalizes the relation extraction task and presents the evaluation framework and relevant related works. +This chapter focuses first on supervised relation extraction using local information only, then on aggregate extraction, which exploits repetitions more directly, before delving into unsupervised relation extraction. +In Chapter~\ref{chap:fitb}, we propose a solution to train deep relation extraction models in an unsupervised fashion. +The problem we tackle is a stability problem between a powerful universal approximator and a weak supervision signal transpiring through the repetitions in the data. +This chapter was the object of a publication at \textsc{acl} \parencite{fitb}. +\begin{marginparagraph} + \hbadness=8000% Can't do better… :'( + Étienne Simon, Vincent Guigue, Benjamin Piwowarski. \citefield{fitb}[linkedtitle]{title} \citefield{fitb}{shortseries}~\cite*{fitb} +\end{marginparagraph} +Chapter~\ref{chap:graph} explores the methods to exploit the structure of the data more directly through the use of graph-based models. +\begin{marginparagraph} + The work presented in Chapter~\ref{chap:graph} still needs to be polished with more experimental work and is yet unpublished at the time of writing. +\end{marginparagraph} +In particular, we draw parallels with the Weisfeiler--Leman isomorphism test to design new methods using topological (dataset-level) and linguistic (sentence-level) features jointly. +Appendix~\ref{chap:french} contains the state-mandated thesis summary in French. +The other appendices provide valuable information that can be used as references. +We strongly encourage the reader to refer to them for additional details on the datasets (Appendix~\ref{chap:datasets}), but even more so for the list of assumptions made by relation extraction models (Appendix~\ref{chap:assumptions}). +These modeling hypotheses are central to the design of unsupervised approaches. +In addition to their definition and reference to the introducing section, Appendix~\ref{chap:assumptions} provides counterexamples, which might help the reader understand the nature of these assumptions. +\end{onehalfspace} diff --git a/frontmatter/notation.tex b/frontmatter/notation.tex @@ -0,0 +1,72 @@ +\chapter{Notation} +Most of this thesis is formatted in one and a half columns, which means that a large right margin is filled with complementary material. +This includes figures, tables and algorithms when space allows, but also epigraphs and marginal notes with supplementary details and comments. +The titles of important bibliographical references are also given in the margin right of their first mention in the section. +Some marginal paragraphs are left unnumbered and provide material about the broadly adjacent passage. +When a section seems unclear, we invite the reader to look for additional information in the margin. +For example, while relation algebra is introduced in Section~\ref{sec:context:relation algebra}, we do not expect most readers to be familiar with its notation. +As such, we will systematically provide an interpretation of relation algebra formulae in plain English in unnumbered marginal paragraphs. + +\bigskip + +\newlength{\notationsWidest} +\settowidth{\notationsWidest}{\(\jsd(P\mathrel{\|}Q)\)} +\begin{longtable}{@{}c p{\dimexpr\textwidth-\tabcolsep*2-\notationsWidest\relax}@{}} + \multicolumn{2}{@{}c@{}}{\textbf{Domain of Variables}} \\ + \(x\) & A scalar \\ + \(\vctr{x}\) & A vector, its elements are indexed \(x_i\) \\ + \(\mtrx{X}\) & A matrix, its rows are indexed \(\vctr{x}_i\), its elements \(x_{ij}\) \\ + \(\tnsr{X}\) & A (three-way) tensor, indexed \(\mtrx{X}_i\), \(\vctr{x}_{ij}\), \(x_{ijk}\) \\ + \(\rndm{x}\) & A random variable (sometimes \(\rndm{X}\) to avoid confusion) \\ + \(\rndmvctr{x}\) & A random vector \\ + \(\symbb{R}\) & The set of real numbers \\ + \(\symbb{R}^n\) & The set of real-valued vectors of length \(n\) \\ + \(\symbb{R}^{n\times m}\) & The set of real-valued matrices with \(n\) rows and \(m\) columns \\ + \(B^A\) & The set of functions from \(A\) to \(B\), in particular \(2^A\) denotes the power set of \(A\) \\ + \multicolumn{2}{@{}b{\textwidth}@{}}{ + To describe the set of real-valued vectors with the same number of elements as a set \(A\), we abuse the morphism from the functions \(\symbb{R}^A\) to the vectors \(\symbb{R}^{|A|}\) and simply write \(\vctr{x}\in\symbb{R}^A\) to denote that \(\vctr{x}\) is a vector with \(|A|\) elements. + } \\[5mm] + \multicolumn{2}{@{}c@{}}{\textbf{Relation Algebra}} \\ + \multicolumn{2}{@{}l@{}}{Relation algebra is described in more detail in Section~\ref{sec:context:relation algebra}.} \\ + \(\relationZero\) & Empty relation \\ + \(\relationOne\) & Complete relation \\ + \(\relationIdentity\) & Identity relation \\ + \(\bar{r}\) & Complementary relation \\ + \(\breve{r}\) & Converse relation (reversed orientation), when applied to a surface form: \(\widebreve{\textsl{born in}}\) \\ + \(\relationComposition\) & Relation composition \\[5mm] + \multicolumn{2}{@{}c@{}}{\textbf{Probability and Information Theory}} \\ + \(P(\rndm{x})\), \(Q(\rndm{x})\) & Probability distribution over \(\rndm{x}\), by default we heavily overload \(P\) (as is customary), when confusion is possible we disambiguate by using \(Q\) \\ + \(\empP(\rndm{x})\) & Empirical distribution over \(\rndm{x}\) (as defined by the dataset) \\ + \(\rndm{x} \independent \rndm{y} \mid \rndm{z}\) & Conditional independence of \(\rndm{x}\) and \(\rndm{y}\) given \(\rndm{z}\) \\ + \(\rndm{x} \notindependent \rndm{y}\) & \(\rndm{x}\) and \(\rndm{y}\) are not independent \\ + \(\uniformDistribution(X)\) & Uniform distribution over the set \(X\) \\ + \(\normalDistribution(\mu, \sigma^2)\) & Normal distribution of mean \(\mu\) and variance \(\sigma^2\) (also used for the multivariate case) \\ + \(\entropy(\rndm{x})\) & Shannon entropy of the random variable \(\rndm{x}\), \(\entropy(\rndm{x}, \rndm{y})\) denotes the joint entropy \\ + \(\entropy(\rndm{x}\mid\rndm{y})\) & Conditional entropy of \(\rndm{x}\) given \(\rndm{y}\) \\ + \(\entropy_Q(P)\) & Cross-entropy of \(P\) relative to \(Q\) \\ + \(\operatorname{I}(\rndm{x}; \rndm{y})\) & Mutual information of \(\rndm{x}\) and \(\rndm{y}\) \\ + \(\pmi(x, y)\) & Pointwise mutual information of events \(x\) and \(y\) \\ + \(\kl(P\mathrel{\|}Q)\) & Kullback--Leibler divergence from \(Q\) to \(P\) \\ + \(\jsd(P\mathrel{\|}Q)\) & Jensen--Shannon divergence between \(P\) and \(Q\) \\ + \(W_1(P, Q)\) & 1-Wasserstein distance between \(P\) and \(Q\) \\[5mm] + \multicolumn{2}{@{}c@{}}{\textbf{Machine Learning}} \\ + \(\sigmoid(x)\) & Logistic sigmoid \(\sigmoid(x) = 1 \divslash (1 + \exp(-x))\) \\ + \(\ReLU(x)\) & Rectified linear unit \(\ReLU(x) = \max(0, x)\), we use \(\ReLU_{\halfCircleScript}\) to refer to the ReLU activation applied to half of the units (see Section~\ref{sec:context:attention lm}) \\ + \(\symcal{L}\) & Loss (to be minimized) \\ + \(J\) & Objective (to be maximized) \\ + \(\overDirected{\fone}\), \(\overUndirected{\fone}\), \(\overHalfdirected{\fone}\) & Directed, undirected and half-directed \fone{} measures (see Section~\ref{sec:relation extraction:supervised evaluation}) \\[5mm] + \multicolumn{2}{@{}c@{}}{\textbf{Graph Operations}} \\ + \(\gfsource(a)\) & Source vertex of the arc \(a\) \\ + \(\gftarget(a)\) & Target vertex of the arc \(a\) \\ + \(\gfrelation(a)\) & Relation conveyed by the arc \(a\) \\ + \(\gfsentence(a)\) & Sentence corresponding to the arc \(a\) \\ + \(\gfneighbors(e)\) & Vertices neighboring the vertex \(e\) \\ + \(\gfincidents(e)\) & Arcs incident to the vertex \(e\) \\ + \(\gfeneighbors(a)\) & Arcs neighboring the arc \(a\) \\[5mm] + \multicolumn{2}{@{}c@{}}{\textbf{Other Operations}} \\ + \(\odot\) & Element-wise (Hadamard) product \\ + \(*\) & Convolution \\ + \(\bowtie\) & Natural join \\ + \(\times_A\) & Pullback with common codomain \(A\) \\ + \(\delta_{i,j}\) & Kronecker's delta, 1 if \(i=j\), 0 otherwise \\ +\end{longtable} diff --git a/frontmatter/thèse.tex b/frontmatter/thèse.tex @@ -0,0 +1,29 @@ +% I like the title page of old French theses, as such I decided to borrow the word "THÈSE" from the theses of famous doctors. +\begin{tikzpicture}[letter/.style={fill=black}, scale=0.12, yscale=-1] + % Henri Poincaré (1879) + %\path[letter] svg {M-1828.8,327.6h-28.2v-80.1c2,0,4.1,0,6.5,0c31.5,0,34.1,35.1,34.1,35.1h4.7v-75.2h-4.3c-3,30.4-30.6,33.6-41.1,33.7v-81.6 h26.2c39.5,0,49.6,53.6,49.6,53.6h5.2v-53.6V154h-141.7v5.4h22.4v168.2h-22.4v5.4h146.7v-5.4v-55.8h-4.8 C-1775.7,271.7-1786.3,327.6-1828.8,327.6z}; + %\path[letter] svg {M-2022.5,159.3 -2000.1,159.3 -2000.1,240.1 -2053.5,240.1 -2053.5,159.3 -2032.9,159.3 -2032.9,154 -2114.2,154 -2114.2,159.3 -2091.8,159.3 -2091.8,327.6 -2114.2,327.6 -2114.2,332.9 -2032.9,332.9 -2032.9,327.6 -2053.5,327.6 -2053.5,246 -2000.1,246 -2000.1,327.6 -2022.5,327.6 -2022.5,332.9 -1941.2,332.9 -1941.2,327.6 -1961.8,327.6 -1961.8,159.3 -1941.2,159.3 -1941.2,154 -2022.5,154z}; + %\path[letter] svg {M-1520.4,327.6h-28.2v-80.1c2,0,4.1,0,6.5,0c31.5,0,34.1,35.1,34.1,35.1h4.7v-75.2h-4.3c-3,30.4-30.6,33.6-41.1,33.7v-81.6 h26.2c39.5,0,49.6,53.6,49.6,53.6h5.2v-53.6V154h-141.7v5.4h22.4v168.2h-22.4v5.4h146.7v-5.4v-55.8h-4.8 C-1467.4,271.7-1478,327.6-1520.4,327.6z}; + %\path[letter] svg {M-1834.4,150.7c2.7,0.3,3.3-1.6,3.3-1.6c-17.8-7-23.4-31.5-23.4-32c0-0.5-5.6-6.6-12.9,1.6 C-1855.1,144.5-1837.1,150.4-1834.4,150.7z}; + %\path[letter] svg {M-2278.6,159.3v54h4.4c0,0,6-54,43.2-54h5.6v168.2h-25.2v5.4h86.9v-5.4h-23.5V159.3h3.9c37.3,0,43.2,54,43.2,54h4.4v-54 V154h-143.1V159.3z}; + %\path[letter] svg {M-1700.3,215.1c-29.8-14.2-26.3-44.5-11.8-57.4c14.5-12.9,40.9-8.4,52.2,4.6c11.4,13,16.3,41.9,16.3,41.9h5.6l-3-50.2h-14 c0,0-14.5-8.8-40.6-7.2c-26,1.6-51.4,27-49.8,59.4c1.6,32.4,29.9,36.3,59.8,48.1s45.3,39.6,34.7,62.8c-10.6,23.3-38.8,26.7-67,10.5 c-28.2-16.2-28-55.8-28-55.8h-3.9l1.5,63l2,1.6c0,0,9.3-6.5,20.6-3.4c11.3,3.1,45.4,17.3,73.9-2.7c28.4-20,33.1-49.3,21.3-72.3 C-1642.4,234.9-1670.6,229.4-1700.3,215.1z}; + %%\path[letter] svg {M-1320.8,257.9c-11.8-23-40-28.6-69.8-42.8c-29.8-14.2-26.3-44.5-11.8-57.4c14.5-12.9,40.9-8.4,52.2,4.6 c11.4,13,16.3,41.9,16.3,41.9h5.6l-3-50.2h-14c0,0-14.5-8.8-40.6-7.2c-26,1.6-51.4,27-49.8,59.4c1.6,32.4,29.9,36.3,59.8,48.1 s45.3,39.6,34.7,62.8c-10.6,23.3-38.8,26.7-67,10.5c-28.2-16.2-28-55.8-28-55.8h-3.9l1.5,63l2,1.6c0,0,9.3-6.5,20.6-3.4 c11.3,3.1,45.4,17.3,73.9-2.7C-1313.6,310.2-1309,280.9-1320.8,257.9z}; + + % Auguste Boulanger (1897) + \path[letter] svg {M-849,152.9 -826.4,152.9 -826.4,234.7 -877.4,234.7 -877.4,152.9 -853.4,152.9 -853.4,147.1 -935.3,147.1 -935.3,152.9 -912.7,152.9 -912.7,325.7 -937.8,325.7 -937.8,333.1 -877.4,333.1 -857,333.1 -857,325.7 -877.4,325.7 -877.4,240.1 -826.4,240.1 -826.4,325.7 -851.5,325.7 -851.5,333.1 -791.2,333.1 -770.8,333.1 -770.8,325.7 -791.2,325.7 -791.2,152.9 -767.2,152.9 -767.2,147.1 -849,147.1z}; + \path[letter] svg {M-514.6,216c-32.4-14.1-36.8-39-27.4-54.1c9.4-15.2,27.7-16.8,43.9,0c16.3,16.8,19.4,45.1,19.4,45.1h5.5l3.5-60.4h-5.6 c-1.3,4.4-7.2,12-18.3,6c-11.1-6-38.3-15.2-58.7,2.3c-20.4,17.5-21.1,42.2-12.9,62.3c8.2,20.2,34,30.8,59.9,43.9 c25.9,13.1,31.4,40.5,18.7,57.2c-12.7,16.7-31.7,17.5-50,2.5c-18.3-15-27.8-51.8-27.8-52.8s-4.3-2-4.8,0.4 c-0.5,2.4-1.1,63.1-1.1,63.1l2.8,1.6c0,0,4.3-5.2,7.2-7.9c3-2.7,11.4-1,20.5,3.4c9.1,4.4,34.4,15.8,61.3-1.8 c26.9-17.6,29.2-43.4,22-64.1C-463.5,242.2-482.3,230.1-514.6,216z}; + \path[letter] svg {M-655.2,326.7c-3.2,0-5.8,0-7.9,0c-6.1,0-8.5,0-9.3,0c0,0-0.2,0-0.7,0c-2.4,0-6.3,0.7-8.6-1c-1.1-0.8-1.8-2.1-1.8-4.3 v-80.9c1.3,0,2.8,0,4.3,0c28.1,0,31.3,44.6,31.3,44.6h5.4v-93.4h-5c0,25.8-14.5,44.4-30.6,44.4c-2.2,0-4,0-5.4,0v-78v-5.1h3 c0.9-0.3,2-0.4,3.4-0.4c6.6,0,11.8,0,11.8,0c2,0,3.9,0.1,5.8,0.3c41.1,4.1,42.8,51.2,42.8,51.2h6.6v-56.8h-45.4 c-0.5,0.1-1,0.1-1.6,0h-2.4h-81.8v5.8h22.6v172.8h-25.1v7.4h60.3h20.4h56.5v-58.5h-4.8C-615.3,298.6-629.2,326.7-655.2,326.7z}; + \path[letter] svg {M-653.1,143.8c-2.5-1.2-4.6-3.4-6.5-6c-6.2-8.5-9.3-21.7-9.3-21.7s-8.7,5.1-28.6,0.9c8.3,17.2,29.8,27.2,37.9,29.5 c1,0.3,1.8,0.5,2.4,0.5h1.6C-652.4,146.7-653.1,143.8-653.1,143.8z}; + \path[letter] svg {M-336.6,326.7c-3.2,0-5.8,0-7.9,0c-6.1,0-8.5,0-9.3,0c0,0-0.2,0-0.7,0c-2.4,0-6.3,0.7-8.6-1c-1.1-0.8-1.8-2.1-1.8-4.3 v-80.9c1.3,0,2.8,0,4.3,0c28.1,0,31.3,44.6,31.3,44.6h5.4v-93.4h-5c0,25.8-14.5,44.4-30.6,44.4c-2.2,0-4,0-5.4,0v-78v-5.1h3 c0.9-0.3,2-0.4,3.4-0.4c6.6,0,11.8,0,11.8,0c2,0,3.9,0.1,5.8,0.3c41.1,4.1,42.8,51.2,42.8,51.2h6.6v-56.8h-45.4 c-0.5,0.1-1,0.1-1.6,0h-2.4h-81.8v5.8h22.6v172.8h-25.1v7.4h60.3h20.4h56.5v-58.5h-4.8C-296.6,298.6-310.5,326.7-336.6,326.7z}; + \path[letter] svg {M-1090.7,207.5h4.8c9.9-47.7,33.3-55.8,37.8-55.8s5.6,0,5.6,5.6c0,5.2,0,150.6,0,169.7h-25.6v6.2h85.1v-6.2h-24.2 c0,0,0-165.3,0-169.7c0-4.3,0.4-5.8,7.2-5.8c0.9,0,1.5,0,2.1,0c27.3,0,34.1,57.9,34.1,57.9h3.7v-62.2h-130.6V207.5z}; + %\path[letter] svg {M-137.2,262.8c-7.2-20.7-25.9-32.8-58.3-46.8c-32.4-14.1-36.8-39-27.4-54.1c9.4-15.2,27.7-16.8,44,0 c16.3,16.8,19.4,45.1,19.4,45.1h5.5l3.5-60.4h-5.6c-1.3,4.4-7.2,12-18.3,6c-11-6-38.3-15.2-58.7,2.3 c-20.4,17.5-21.1,42.2-12.9,62.3c8.2,20.2,34,30.8,59.9,43.9c25.9,13.1,31.4,40.5,18.7,57.2c-12.7,16.7-31.7,17.5-50,2.5 c-18.3-15-27.8-51.8-27.8-52.8s-4.3-2-4.8,0.4c-0.5,2.4-1.1,63.1-1.1,63.1l2.8,1.6c0,0,4.3-5.2,7.2-7.9c3-2.7,11.4-1,20.5,3.4 c9.1,4.4,34.4,15.8,61.3-1.8C-132.4,309.3-130,283.5-137.2,262.8z}; + + % Henri Cartan (1928) + %\path[letter] svg {M157.6,147.7h-2.8H27.4v48.7c0,0.7,0,1.4,0,2.1l5.6-0.2v-1.9c0.5-8.3,6.7-20.6,15.3-29.6c8.3-8.7,17.6-13.5,26.4-13.5h3.2 v169.2H60.7h-6.5v5.6h6.5h63.6h6.5v-5.6h-6.5h-17.2V153.3h3.2c8.8,0,18.2,4.8,26.4,13.5c8.5,9,14.8,21.3,15.3,29.6v1.9l5.6,0.2 c0-0.7,0-1.4,0-2.1V147.7z}; + %\path[letter] svg {M288.6,153.3 314.5,153.3 314.5,233.6 243.8,233.6 243.8,153.3 269,153.3 269,147.7 192.9,147.7 192.9,153.3 216.7,153.3 216.7,322.4 192.9,322.4 192.9,328 269,328 269,322.4 243.8,322.4 243.8,239.2 314.5,239.2 314.5,322.4 288.6,322.4 288.6,328 364.7,328 364.7,322.4 341.6,322.4 341.6,153.3 364.7,153.3 364.7,147.7 288.6,147.7z}; + %\path[letter] svg {M632,213.2c-29.3-16.4-33.4-40.1-21.4-52.5c12-12.4,33.5-15.5,50.3-1.3c16.8,14.2,26.1,40.7,26.1,40.7h4.6v-52.3H687 c0,0,0.9,7.4-10.1,8s-14.3-9.3-39.9-10.5c-25.7-1.3-49.7,16.5-48.3,48.3c1.3,30.8,24.2,42.3,55.2,56c30.9,13.7,44.9,27.4,44.8,48 c-0.1,20.7-30.8,40.3-57.5,24.1c-26.7-16.3-35.3-53.2-35.3-53.2h-4.4l-1.6,59.2h3.4c8.8-13.6,19.7-7.7,19.7-7.7 c31.6,18.8,65,13,81.1-7.7c16.1-20.7,18.2-46.6,2-63.9C679.7,231.3,661.3,229.6,632,213.2z}; + %\path[letter] svg {M531.9,301.6c-6.3,9.5-17.3,20.9-34.5,20.9h-46.9v-83.2h19.1h1.6v0.2c19.5,0,23.6,22.2,23.6,40.8h5.6V193h-5.6 c0,15.2-3.3,40.6-25.2,40.6h-19.1v-80.4h36.4c35.2,0,50.9,32.8,50.9,45.3h5.6v-50.9H401.8v5.6h21.6v169.2h-21.6v5.6h146.3v-54h-5.6 C542.5,278,539.4,290.2,531.9,301.6z}; + %\path[letter] svg {M874.9,301.6c-6.3,9.5-17.3,20.9-34.5,20.9h-46.9v-83.2h19.1h1.6v0.2c19.5,0,23.6,22.2,23.6,40.8h5.6V193h-5.6 c0,15.2-3.3,40.6-25.2,40.6h-19.1v-80.4h36.4c35.2,0,50.9,32.8,50.9,45.3h5.6v-50.9H744.8v5.6h21.6v169.2h-21.6v5.6h146.3v-54h-5.6 C885.5,278,882.4,290.2,874.9,301.6z}; + %\path[letter] svg {M492.5,145.9v-3.7c-17.8-2.1-19.5-20.1-19.5-20.1s-6,1.3-21.8,0C451.8,145.5,492.5,145.9,492.5,145.9z}; + %%\path[letter] svg {M1036,248.5c-16.3-17.2-34.7-19-64-35.3c-29.3-16.4-33.4-40.1-21.4-52.5c12-12.4,33.5-15.5,50.3-1.3 c16.8,14.2,26.1,40.7,26.1,40.7h4.6v-52.3h-4.6c0,0,0.9,7.4-10.1,8c-11,0.6-14.3-9.3-39.9-10.5c-25.7-1.3-49.7,16.5-48.3,48.3 c1.3,30.8,24.2,42.3,55.2,56c30.9,13.7,44.9,27.4,44.8,48c-0.1,20.7-30.8,40.3-57.5,24.1c-26.7-16.3-35.3-53.2-35.3-53.2h-4.4 l-1.6,59.2h3.4c8.8-13.6,19.7-7.7,19.7-7.7c31.6,18.8,65,13,81.1-7.7C1050.1,291.7,1052.2,265.8,1036,248.5z}; +\end{tikzpicture} diff --git a/frontmatter/title.tex b/frontmatter/title.tex @@ -0,0 +1,61 @@ +\newgeometry{margin=2cm} +\begin{titlepage} +\fontsize{12pt}{14.5pt}\selectfont +\begin{otherlanguage}{french} +\begin{center} +\hfil{Sorbonne Université}\hfil---\hfil{\textsc{lip6}~\&~\textsc{isir}}\hfil---\hfil{\textsc{edite} de Paris}\hfil\null +\vfill +\input{frontmatter/thèse.tex} +\vfill +{défendue par\par} +\medskip +\makeatletter +{\fontsize{14pt}{18pt}\selectfont\hypersetup{hidelinks}\href{mailto:esimon@esimon.eu}{\@author}\par} +\makeatother +\vfill +en vue de l'obtention du grade de Docteur\par +\vfill +\hrule +\vspace{1cm} +\makeatletter +{\garamond\fontsize{24pt}{30pt}\selectfont\scshape\foreignlanguage{english}{\@title}\par} +\makeatother +\vspace{1cm} +\hrule +\vfill +soutenue publiquement le 30 juin 2022\par +\vfill +Devant le jury composé de\par +\bigskip +\begin{tabular*}{\textwidth}{@{}l@{ }l@{\extracolsep{\fill}}r@{}} + \textbf{Pr} & + \textbf{Alexandre Allauzen} & + Rapporteur \\ + \multicolumn{3}{@{}l}{Professeur des universités, Université Paris-Dauphine \textsc{psl}, \textsc{espci}} \\ + \textbf{Dr} & + \textbf{Benoit Favre} & + Rapporteur \\ + \multicolumn{3}{@{}l}{Maître de conférences, Aix-Marseille Université} \\ + \textbf{Pr} & + \textbf{Pascale Sébillot} & + Examinatrice \\ + \multicolumn{3}{@{}l}{Professeure des universités, \textsc{irisa}, \textsc{insa} Rennes} \\ + \textbf{Pr} & + \textbf{Xavier Tannier} & + Examinateur \\ + \multicolumn{3}{@{}l}{Professeur des universités, Sorbonne Université} \\ + \textbf{Dr} & + \textbf{Benjamin Piwowarski} & + Co-encadrant \\ + \multicolumn{3}{@{}l}{Chargé de recherche, \textsc{cnrs}, Sorbonne Université} \\ + \textbf{Dr} & + \textbf{Vincent Guigue} & + Directeur \\ + \multicolumn{3}{@{}l}{Maître de conférences, Sorbonne Université} \\ +\end{tabular*} +\end{center} +\end{otherlanguage} +\end{titlepage} +\restoregeometry +\clearpage +\thispagestyle{empty} diff --git a/latexmkrc b/latexmkrc @@ -0,0 +1,26 @@ +# Main file +@default_files = ("thesis.tex", "french summary.tex"); + +# Use lualatex +$pdf_mode = 4; + +# Where to write output and temporary files +$out_dir = "build"; + +# Allow the execution of arbitrary shell command +set_tex_cmds("--shell-escape %O %S"); + +# Create build directories if needed +unless(-d "$out_dir"){ + mkdir("$out_dir"); + mkdir("$out_dir/frontmatter"); + mkdir("$out_dir/mainmatter"); + mkdir("$out_dir/mainmatter/context"); + mkdir("$out_dir/mainmatter/relation extraction"); + mkdir("$out_dir/mainmatter/fitb"); + mkdir("$out_dir/mainmatter/graph"); + mkdir("$out_dir/backmatter"); + mkdir("$out_dir/backmatter/french"); + mkdir("$out_dir/backmatter/assumptions"); + mkdir("$out_dir/backmatter/datasets"); +} diff --git a/lib/distribution output.def b/lib/distribution output.def @@ -0,0 +1,14 @@ +\tikzset{ + activation/.style={fill=Dark2-C}, + mean activation/.style={fill=Dark2-B}, +} + +\NewDocumentCommand\drawDistribution{m m m m}{ + \foreach \i/\value in {#4}{ + \pgfmathsetmacro\east{1.65+\i/10*3.25} + \pgfmathsetmacro\west{\east+3.25/10-0.02} + \pgfmathsetmacro\north{#3+\value/2+0.02} + \path[#1] (\east, #3) rectangle (\west, \north); + } + \node[anchor=east] (r1) at ($(1.5, 0.165) + (0, #3)$) {#2}; +} diff --git a/lib/draft version.lua b/lib/draft version.lua @@ -0,0 +1,39 @@ +require("io") +require("os") + +local draft_version = {} + +draft_version.cache = nil + +local function git_commit() + local handle = assert(io.popen("git rev-parse HEAD", "r")) + local output = handle:read("*all") + local result = {handle:close()} + assert(result[3] == 0) + return output:sub(1, 8) +end + +local function git_uncommited_changes() + local handle = assert(io.popen("git status --porcelain", "r")) + local output = handle:read("*all") + local result = {handle:close()} + assert(result[3] == 0) + return output ~= "" +end + +local function date() + return os.date("%Y-%m-%d %H:%M:%S", os.time()) +end + +function draft_version.draft_version() + if draft_version.cache == nil then + local extra = "" + if git_uncommited_changes() then + extra = "+" + end + draft_version.cache = "compiled " .. date() .. " commit " .. git_commit() .. extra + end + tex.print(draft_version.cache) +end + +return draft_version diff --git a/lib/layout.lua b/lib/layout.lua @@ -0,0 +1,72 @@ +-- This script set up the two geometry withmarginpar and withoutmarginpar. +-- The page layout somewhat respect the Van de Graaf canon except that the textblock width is not a simple fraction of the page width. +-- However the textblock does have the same proportion as the page, with the margin respecting the 1:2 ratio (with twoside) +-- Since a large marginpar is used, it is included in the textblock width, its width being a parameter. +require("math") + +local layout = {} + +local function format(value) + return tostring(math.floor(value+0.5)) .. "sp" +end + +local function set_key(config, key, value) + if value ~= nil then + value = "=" .. format(value) + else + value = "" + end + table.insert(config, key .. value) +end + +function layout.set(parameters) + -- Parameters: + -- twoside: whether to distinguish outer and inner margins + -- top: size of the top margin from which other margins are computed + -- mpwidth: width of the marginpar column + -- mpsep: space between the marginpar column and normal text + -- debug: whether to display the construction frame + local top = tex.sp(parameters.top) + + local common_config = {} + set_key(common_config, "a4paper") + if parameters.debug then + set_key(common_config, "showframe") + end + set_key(common_config, "top", top) + set_key(common_config, "bottom", top * 2) + if parameters.twoside then + set_key(common_config, "inner", top / math.sqrt(2)) + set_key(common_config, "outer", top * math.sqrt(2)) + else + set_key(common_config, "inner", top * 3 / 2 / math.sqrt(2)) + set_key(common_config, "outer", top * 3 / 2 / math.sqrt(2)) + end + set_key(common_config, "includehead") + + local without_margin_config = {unpack(common_config)} + set_key(without_margin_config, "nomarginpar") + -- Set a tiny marginpar to avoid spurious overfull \hbox + set_key(without_margin_config, "marginpar", tex.sp("0.4pt")) + if parameters.twoside then + set_key(without_margin_config, "twoside") + end + + local with_margin_config = {unpack(common_config)} + set_key(with_margin_config, "includemp") + set_key(with_margin_config, "asymmetric") + set_key(with_margin_config, "marginpar", tex.sp(parameters.mpwidth)) + set_key(with_margin_config, "marginparsep", tex.sp(parameters.mpsep)) + + tex.print([[\RequirePackage[]] .. table.concat(with_margin_config, ",") .. [[]{geometry}]]) + if parameters.twoside then + tex.print([[\setlength\evensidemargin{]] .. format(top * math.sqrt(2) - tex.sp("1in")) .. [[}]]) + tex.print([[\setlength\oddsidemargin{]] .. format(top / math.sqrt(2) - tex.sp("1in")) .. [[}]]) + end + tex.print([[\savegeometry{withmarginpar}]]) + + tex.print([[\geometry{]] .. table.concat(without_margin_config, ",") .. [[}]]) + tex.print([[\savegeometry{withoutmarginpar}]]) +end + +return layout diff --git a/lib/memory network.def b/lib/memory network.def @@ -0,0 +1,23 @@ +% Draw a condensed memory network +% Arguments: +% [#1] opacity (default: 1) +% {#2} position +% {#3} node name suffix +% [#4] exponent (default: none) +\NewDocumentCommand{\memorynetwork}{O{1} m m o}{ + \begin{scope}[shift={#2},opacity=#1] + \draw[dashdotted,color=Dark2-B,thick,fill=white] (-0.125, 0.875) rectangle (1.5, -0.625); + \node (q#3) at (1.25, -0.375) {\tiny \(\vctr{q}\IfNoValueF{#4}{^{(#4)}}\)}; + \node (o#3) at (1.25, 0.625) {\tiny \(\vctr{o}\IfNoValueF{#4}{^{(#4)}}\)}; + \node (m#3) at (0.06, 0.125) {}; + \draw (0, 0.1) rectangle (0.75, 0); + \draw (0, 0.25) rectangle (0.75, 0.15); + \draw[arrow] (q#3) .. controls ++(180:0.25) and ++(270:0.425) .. (0.375, 0); + \draw[arrow] (0.375,0.25) .. controls ++(90:0.425) and ++(180:0.35) .. (o#3); + \draw[arrow,rounded corners=2pt] (1.25,-0.8) -- (1.6,-0.8) -- (1.6,0.975) -- (1.25,0.975); + \end{scope} +} + +\tikzset{ + memorybrace/.style={decorate,thick,color=Dark2-C,decoration={brace,amplitude=5}} +} diff --git a/lib/moved marginpar.lua b/lib/moved marginpar.lua @@ -0,0 +1,19 @@ +local moved_marginpar = {} + +moved_marginpar.list = {} + +function moved_marginpar.display() + io.stdout:write("Marginpar moved on pages:") + for _, page in ipairs(moved_marginpar.list) do + io.stdout:write(" " .. page) + end + print() +end + +function moved_marginpar.declare(location) + table.insert(moved_marginpar.list, location) +end + +luatexbase.add_to_callback("wrapup_run", moved_marginpar.display, "Display all moved marginpar.") + +return moved_marginpar diff --git a/lib/plate diagram.def b/lib/plate diagram.def @@ -0,0 +1,7 @@ +\tikzset{ + pdiag var/.style={draw,circle,minimum width=5.5mm,inner sep=0.75mm}, + pdiag latent/.style={pdiag var,fill=white}, + pdiag observed/.style={pdiag var,fill=gray!25}, + pdiag plate/.style={draw,rectangle,rounded corners}, + pdiag factor/.style={draw,rectangle,fill=black,minimum width=1.5mm}, +} diff --git a/lib/render.lua b/lib/render.lua @@ -0,0 +1,206 @@ +require("io") +require("math") + +local xmlhandler = require("luaxml-mod-handler") +local xmlparser = require("luaxml-mod-xml") + +local CONFUSION_Y_SPREAD = 0.35 + +local render = {} + +local function load_xml(path) + local treehandler = xmlhandler.simpleTreeHandler() + local xml = xmlparser.xmlParser(treehandler) + + local file = io.open(path, "r") + xml:parse(file:read("*a")) + file:close() + + return treehandler.root +end + +local function render_number(x) + local s = tostring(x) + local r = "" + local c = 0 + for i=#s,1,-1 do + if (#s-i) % 3 == 0 and i~=#s then + r = "\\," .. r + end + r = s:sub(i, i) .. r + end + return r +end + +local function embeddings_bounds(list, field) + local values = {} + for _, item in ipairs(list) do + table.insert(values, tonumber(item[field])) + end + local low = math.floor(math.min(unpack(values))*2)/2 + local high = math.ceil(math.max(unpack(values))*2)/2 + return low, high +end + +function render.embeddings(path) + local xml = load_xml(path) + local xmin, xmax = embeddings_bounds(xml.embeddings.embedding, "x") + local ymin, ymax = embeddings_bounds(xml.embeddings.embedding, "y") + tex.print([[\begin{tikzpicture}]]) + tex.print([[\begin{axis}[modern, width=50mm,]]) + tex.print([[ xmin=]] .. tostring(xmin) .. [[,]]) + tex.print([[ xmax=]] .. tostring(xmax) .. [[,]]) + tex.print([[ ymin=]] .. tostring(ymin) .. [[,]]) + tex.print([[ ymax=]] .. tostring(ymax) .. [[,]]) + tex.print([[ xtick={]] .. tostring(xmin) .. [[,]] .. tostring(xmin+0.5) .. [[,...,]] .. tostring(xmax) .. [[},]]) + tex.print([[ ytick={]] .. tostring(ymin) .. [[,]] .. tostring(ymin+0.5) .. [[,...,]] .. tostring(ymax) .. [[},]]) + tex.print([[ x tick label style={rotate=90,anchor=east},]]) + tex.print([[ ] ]]) + tex.print([[\addplot+ [black, only marks, mark=*, mark options={fill=black}, nodes near coords, point meta=explicit symbolic,]]) + tex.print([[ coordinate style/.condition={x<0}{anchor=west},]]) + tex.print([[ coordinate style/.condition={x>0}{anchor=east},]]) + tex.print([[ ] coordinates {]]) + for _, embedding in ipairs(xml.embeddings.embedding) do + tex.print(("(%f, %f) [%s]"):format(embedding.x, embedding.y, embedding.label)) + end + tex.print([[};]]) + tex.print([[\end{axis}]]) + tex.print([[\end{tikzpicture}%]]) + tex.print([[\def\explainedvarx{]] .. ([[%2.1f\%%]]):format(100*xml.embeddings.explained.x) .. [[}%]]) + tex.print([[\def\explainedvary{]] .. ([[%2.1f\%%]]):format(100*xml.embeddings.explained.y) .. [[}%]]) +end + +function render_confusion(path, xorig, label) + local xml = load_xml(path) + for j=1,10 do + local xpos = xorig+j*0.27 + tex.print([[\node at (]]..xpos..[[, 0) {\scriptsize ]]..(j-1)..[[};]]) + end + for i, gold in ipairs(xml.confusion.gold) do + local ypos = i*-CONFUSION_Y_SPREAD + for j, cell in ipairs(gold.clusters.recall) do + local xpos = xorig+j*0.27 + local radius = math.sqrt(cell) * 0.15 + local content = string.format("%.0f", 100*cell) + tex.print([[\fill (]]..xpos..[[, ]]..ypos..[[) circle (]]..radius..[[);]]) + end + end + local ypos = -CONFUSION_Y_SPREAD*#xml.confusion.gold - 0.2 + local bwest = xorig + 0.5*0.27 + local beast = xorig + 10.5*0.27 + tex.print([[\draw[decorate, decoration={brace, amplitude=5}] (]]..beast..[[, ]]..ypos..[[) -- (]]..bwest..[[, ]]..ypos..[[) node[below, midway, yshift=-1mm] {]]..label..[[};]]) +end + +function render_confusion_legend(path) + local xml = load_xml(path) + for i, gold in ipairs(xml.confusion.gold) do + local frequency = string.format("%.2f", 100*gold.relation.frequency) + if tonumber(gold.relation.frequency) < 0.1 then + frequency = [[\hphantom{0}]] .. frequency + end + local label = nil + if gold.relation.reversed then + label = [[\(e_2\) ]] .. gold.relation.surfaceform .. [[ \(e_1\)]] + else + label = [[\(e_1\) ]] .. gold.relation.surfaceform .. [[ \(e_2\)]] + end + local ypos = i*-CONFUSION_Y_SPREAD + tex.print([[\node[anchor=west] at (0, ]]..ypos..[[) {\scriptsize{}]]..frequency..[[\% ]]..label..[[ (\wdrel{]]..gold.relation.identifier..[[})};]]) + end +end + +function render.confusions(path1, label1, path2, label2, path3, label3, path4, label4) + tex.print([[\begin{tikzpicture}]]) + render_confusion(path1, -12, label1) + render_confusion(path2, -9, label2) + render_confusion(path3, -6, label3) + render_confusion(path4, -3, label4) + render_confusion_legend(path1) + tex.print([[\end{tikzpicture}]]) +end + +local function degrees_table(dict) + local table = {} + for _, value in ipairs(dict) do + table[tonumber(value.degree)] = tonumber(value.frequency) + end + return table +end + +local function degrees_bound(degrees, first, second) + local upper_bound = math.min(#degrees.indegrees.value, #degrees.outdegrees.value) + local max_frequency = 0 + local min_frequency = 1 + for degree = 1, upper_bound do + if first[degree] == nil or second[degree] == nil or first[degree]<1e-5 or second[degree]<1e-5 then + return degree - 1, min_frequency, max_frequency + end + max_frequency = math.max(max_frequency, first[degree], second[degree]) + min_frequency = math.min(min_frequency, first[degree], second[degree]) + end + return upper_bound, min_frequency, max_frequency +end + +local function degrees_true_max(inds, outds) + local maximum = 0 + local mtype = "in" + for degree, count in pairs(inds) do + maximum = math.max(maximum, degree) + end + local inter_max = maximum + for degree, count in pairs(outds) do + maximum = math.max(maximum, degree) + end + if maximum > inter_max then + mtype= "out" + end + return maximum, mtype +end + +function render.degrees(path) + local xml = load_xml(path) + local indegrees = degrees_table(xml.degrees.indegrees.value) + local outdegrees = degrees_table(xml.degrees.outdegrees.value) + local max_degree, min_frequency, max_frequency = degrees_bound(xml.degrees, indegrees, outdegrees) + local min_frequency = math.pow(10, math.floor(math.log(min_frequency, 10))) + local max_frequency = math.pow(10, math.ceil(math.log(max_frequency, 10))) + local right_degree = math.pow(10, math.ceil(math.log(max_degree, 10))) + + tex.print([[\begin{tikzpicture}]]) + tex.print([[\begin{loglogaxis}[modern, width=45mm,]]) + tex.print([[ legend entries={in-degree, out-degree},]]) + tex.print([[ legend columns=2,]]) + tex.print([[ legend style={at={(1,1.05)}, anchor=south east, draw=none},]]) + tex.print([[ xlabel={degree},]]) + tex.print([[ ylabel={frequency},]]) + tex.print([[ xmin=1,]]) + tex.print([[ xmax=]] .. tostring(right_degree) .. [[,]]) + tex.print([[ ymin=]] .. tostring(min_frequency) .. [[,]]) + tex.print([[ ymax=]] .. tostring(max_frequency) .. [[,]]) + tex.print([[ ytick={1e-1, 1e-2, 1e-3, 1e-4, 1e-5},]]) + tex.print([[ ] ]]) + tex.print([[\addplot+ [Dark2-A,]]) + tex.print([[ mark=none,]]) + tex.print([[ ] coordinates {]]) + for degree = 1, max_degree do + tex.print(("(%f, %f)"):format(degree, indegrees[degree])) + end + tex.print([[};]]) + tex.print([[\addplot+ [Dark2-B,]]) + tex.print([[ mark=none,]]) + tex.print([[ ] coordinates {]]) + for degree = 1, max_degree do + tex.print(("(%f, %f)"):format(degree, outdegrees[degree])) + end + tex.print([[};]]) + tex.print([[\end{loglogaxis}]]) + tex.print([[\end{tikzpicture}%]]) + tex.print([[\def\numberarcs{]] .. render_number(xml.degrees.m) .. [[}%]]) + tex.print([[\def\maxdisplayeddegree{]] .. render_number(max_degree) .. [[}%]]) + + local true_max_degree, true_max_type = degrees_true_max(indegrees, outdegrees) + tex.print([[\def\maxdegree{]] .. render_number(true_max_degree) .. [[}%]]) + tex.print([[\def\maxdegreetype{]] .. true_max_type .. [[}%]]) +end + +return render diff --git a/lib/terminal color.lua b/lib/terminal color.lua @@ -0,0 +1,66 @@ +local tcolor = {} + +function tcolor.warning(msg) + io.stdout:write("\27[38:5:208;1m" .. tostring(msg) .. "\27[0m") +end + +function start_page_number() + io.stdout:write("\27[33;1m[") + local j = 9 + while tex.count[j] == 0 and j > 0 do + j = j - 1 + end + for k=0, j do + io.stdout:write(tex.count[k]) + if k < j then + io.stdout:write(".") + end + end + io.stdout:write("\27[0m") +end +function stop_page_number() + io.stdout:write("\27[33;1m]\27[0m") +end + +local stack = {} + +function is_local_file(filename) + if filename:sub(1, 2) == "./" then + return true + end + if filename:sub(1, 3) == "\"./" then + return true + end + return false +end + +local filetypes_left = { [0]="?", "(", "{", "<", "<", "<<" } +local filetypes_right = { [0]="?", ")", "}", ">", ">", ">>" } + +function start_file(category, filename) + if category == 1 and is_local_file(filename) then + io.stdout:write("\27[90;1m") + table.insert(stack, true) + else + io.stdout:write("\27[90m") + table.insert(stack, false) + end + io.stdout:write(filetypes_left[category]..filename.."\27[0m") +end + +function stop_file(category) + local t = table.remove(stack) + if t then + io.stdout:write("\27[90;1m") + elseif t == false then + io.stdout:write("\27[90m") + end + io.stdout:write(filetypes_right[category] .. "\27[0m") +end + +luatexbase.add_to_callback("start_page_number", start_page_number, "Start yellow page number.") +luatexbase.add_to_callback("stop_page_number", stop_page_number, "Stop yellow page number.") +luatexbase.add_to_callback("start_file", start_file, "Start grey file.") +luatexbase.add_to_callback("stop_file", stop_file, "Stop grep file.") + +return tcolor diff --git a/mainmatter/context/attention.tex b/mainmatter/context/attention.tex @@ -0,0 +1,78 @@ +\begin{tikzpicture}[ + text shadow/.code args={[#1]#2at#3(#4)#5}{ + \pgfkeysalso{/tikz/.cd,#1}% + \foreach \angle in {0,5,...,359}{ + \node[#1,text=white] at ([shift={(\angle:.5pt)}] #4){#5}; + } + } + ] + \node (q) at (2.5, -1.5) {\(\vctr{q}\)}; + \node (o) at (2.5, 1.75) {\(\vctr{o}\)}; + \node (mk) at (0.12, -0.25) {}; + \node (mv) at (0.12, 0.55) {}; + \draw (0, -0.5) rectangle (2, 0); + \draw (0, 0.3) rectangle (2, 0.8); + \draw[dashdotted,Dark2-B,thick] (-0.25, 2.20) rectangle (3.2, -2.05); + \node (qh) at (2.5, -2.5) {query}; + \node (hi) at (2.5, 2.75) {output}; + + \draw[arrow] (qh) -- (q); + \draw[arrow] (o) -- (hi); + + \draw[decorate,decoration={brace,amplitude=5},xshift=0,yshift=0.5] (0,0.8) -- (2,0.8) node[black,midway] (memv) {}; + \node (ws) at (1, 1.15) {\scriptsize weighted sum}; + \draw[arrow] (ws) .. controls ++(90:0.75) and ++(180:0.5) .. (o); + \draw[decorate,decoration={brace,amplitude=5,mirror},xshift=0,yshift=-0.5] (0,-0.5) -- (2,-0.5) node[black,midway] (memv) {}; + \node (ip) at (1, -0.8) {\scriptsize inner product}; + \draw[arrow] (q) .. controls ++(180:0.5) and ++(270:0.75) .. (ip); + + \node (s0) at (-6, -1) {\(\vctr{h}_0\)}; + \node (s1) at (-4.5, -1) {\(\vctr{h}_1\)}; + \node (sd) at (-3, -1) {\(\ldots\)}; + \node (sn) at (-1.5, -1) {\(\vctr{h}_\ell\)}; + + \draw[arrow] (s0) -- (s1); + \draw[dashed,arrow] (s1) -- (sd); + \draw[dashed,arrow] (sd) -- (sn); + + \node (x0) at (-6, -2) {\(\vctr{x}_0\)}; + \node (x1) at (-4.5, -2) {\(\vctr{x}_1\)}; + \node (xd) at (-3, -2) {\(\ldots\)}; + \node (xn) at (-1.5, -2) {\(\vctr{x}_\ell\)}; + + \draw[arrow] (x0) -- (s0); + \draw[arrow] (x1) -- (s1); + \draw[arrow] (xn) -- (sn); + + \draw[decorate,thick,color=Dark2-C,decoration={brace,amplitude=5},xshift=0,yshift=8] (-6.3,-1) -- (-1.2,-1) node[black,midway] (s) {}; + \node (henc) at (-3.75, -0.25) {memory}; + \draw (henc) .. controls ++(30:1) and ++(180:1) .. (-1.5,0.15); + \draw[arrow] (-1.5,0.15) .. controls ++(0:1) and ++(180:1) .. (mk); + \draw[arrow] (-1.5,0.15) .. controls ++(0:1) and ++(180:1) .. (mv); + + \draw (0, 0.1) rectangle (2, 0.2); + \node at (2.6, 0.17) {\scriptsize softmax}; + \draw[fill=red!60] (0.4, 0.1) rectangle (0.5, 0.2); + \draw[fill=red!10] (0.5, 0.1) rectangle (0.6, 0.2); + \draw[fill=red!20] (1.2, 0.1) rectangle (1.3, 0.2); + \draw[fill=red!40] (1.3, 0.1) rectangle (1.4, 0.2); + \draw[fill=red!30] (1.4, 0.1) rectangle (1.5, 0.2); + \draw[fill=red!10] (1.8, 0.1) rectangle (1.9, 0.2); + + \draw[fill=green!60] (0.4, 0.3) rectangle (0.5, 0.8); + \draw[fill=green!10] (0.5, 0.3) rectangle (0.6, 0.8); + \draw[fill=green!20] (1.2, 0.3) rectangle (1.3, 0.8); + \draw[fill=green!40] (1.3, 0.3) rectangle (1.4, 0.8); + \draw[fill=green!30] (1.4, 0.3) rectangle (1.5, 0.8); + \draw[fill=green!10] (1.8, 0.3) rectangle (1.9, 0.8); + + \draw[fill=blue!60] (0.4, -0.5) rectangle (0.5, 0); + \draw[fill=blue!10] (0.5, -0.5) rectangle (0.6, 0); + \draw[fill=blue!20] (1.2, -0.5) rectangle (1.3, 0); + \draw[fill=blue!40] (1.3, -0.5) rectangle (1.4, 0); + \draw[fill=blue!30] (1.4, -0.5) rectangle (1.5, 0); + \draw[fill=blue!10] (1.8, -0.5) rectangle (1.9, 0); + + \node[text shadow={[align=center,text width=3cm] at (1,0.55) {Value}}] at (1,0.55) {Value}; + \node[text shadow={[align=center,text width=3cm] at (1,-0.25) {Key}}] at (1,-0.25) {Key}; +\end{tikzpicture}% diff --git a/mainmatter/context/bert.tex b/mainmatter/context/bert.tex @@ -0,0 +1,43 @@ +\begin{tikzpicture} + \path[clip] (-1.15, -2.5) -- (-1.15, 4.5) -- (3.8, 4.5) -- (3.8, -2.5) -- cycle; + \foreach \I/\n/\x/\focus in {1/1/-0.85/\transparencyLow, m2/./0/\transparencyLow, m1/t-1/0.85/\transparencyLow, p1/./2.55/\transparencyLow, m/m/3.40/\transparencyLow, 0/t/1.7/1}{ + \if.\n + \node (w\I) at (\x, 4) {\(\cdots\)\vphantom{\(\hat{w}_{\n}\)}}; + \node (x\I) at (\x, -2) {\(\cdots\)\vphantom{\(\vctr{\tilde{x}}_{\n}\)}}; + \else + \node (w\I) at (\x, 4) {\(\hat{w}_{\n}\)}; + \node (x\I) at (\x, -2) {\(\vctr{\tilde{x}}_{\n}\)}; + \fi + + \pgfmathsetmacro\xmn{\x-1.25} + \memorynetwork[\focus]{(\xmn,-0.3)}{\I}; + + \begin{scope}[opacity=\focus] + \node[draw, fill=white, above=6mm of o\I] (lnf\I) {Layer Norm}; + \node[draw, fill=white, above=4mm of lnf\I] (lin\I) {Linear}; + \node[draw, fill=white, above=4mm of lin\I] (lns\I) {Layer Norm}; + + \coordinate (ressrc\I) at ($(lin\I.south) + (0, -2.5mm)$); + \coordinate (resdst\I) at ($(lin\I.north) + (0, 1mm)$); + \draw[arrow, rounded corners=2pt] (ressrc\I) -- ($(ressrc\I) + (6mm, 0mm)$) -- ($(resdst\I) + (6mm, 0mm)$) -- (resdst\I); + + \draw[arrow] (x\I) -- (q\I); + \draw[arrow] (o\I) -- (lnf\I); + \draw[arrow] (lnf\I) -- (lin\I); + \draw[arrow] (lin\I) -- (lns\I); + \draw[arrow] (lns\I) -- (w\I); + \end{scope} + } + + \draw[memorybrace,decoration={aspect=0.2}] (x1.north west) -- (xm.north east) coordinate[midway,yshift=5] (xbty); + \path ($(x1.north west)!0.2!(xm.north east)$) |- coordinate (xbt) (xbty); + \draw[arrow,rounded corners=5pt] (xbt) -- (xbt|-m0) -- (m0); + + \draw[very thick, dashed] (-1.15, -1.4) -- (3.8, -1.4); + \draw[very thick, dashed] (-1.15, 3.4) -- (3.8, 3.4); + \node[rotate=90, fill=white] at (3.7, 1.1) {\marginsize \hphantom{(repeated x times)}}; + \node[rotate=90, fill=white] (name) at (3.35, 1.1) {\large\IfLanguageName{french}{couche \textsc{bert}}{\textsc{bert} layer}}; + \node[rotate=90] at (3.7, 1.1) {\marginsize(\IfLanguageName{french}{répétée 12 fois}{repeated 12 times})}; + \draw[arrow] (name) -- (3.35, 3.4); + \draw[arrow] (name) -- (3.35, -1.4); +\end{tikzpicture}% diff --git a/mainmatter/context/bpe.tex b/mainmatter/context/bpe.tex @@ -0,0 +1,16 @@ +\begin{algorithmic} + \Function{bpe}{} + \FunctionInputs{} \(n\) the vocabulary size + \FunctionInputs*{} \(\vctr{t}\) the corpus + \FunctionOutput{} \(V\) the vocabulary + \State + \State \(V \gets\) all unique characters in \(\vctr{t}\) + \While{\(|V| < n\)} + \State \(c_1c_2 \gets\)~\parbox[t]{28mm}{most common bigram in \(\vctr{t}\)} + \State \(c_\text{new} \gets\)~new token not in \(V\) + \State \(\vctr{t}\gets\)~\parbox[t]{32mm}{replace all occurrences of \(c_1c_2\) in \(\vctr{t}\) by \(c_\text{new}\)} + \State \(V \gets V \cup \{c_\text{new}\}\) + \EndWhile + \State \Output \(V\) + \EndFunction +\end{algorithmic} diff --git a/mainmatter/context/chapter.tex b/mainmatter/context/chapter.tex @@ -0,0 +1,26 @@ +\chapter{Context: Distributed Representations} +\label{chap:context} +\begin{epigraph} + {Willard Van Orman Quine} + {\citetitle{quine_two_dogma}} + {\cite*{quine_two_dogma}}[Quine was skeptical that facts about the meanings of linguistic expressions existed, for a critical response to his position see \textcite{meaning_skepticism}.] + Meaning is what essence becomes when it is divorced from the object of reference and wedded to the word. +\end{epigraph} +\tatefix{3mm}{5mm}{7mm} +\begin{cjkepigraph}[\traditionalChinese]{45mm} + {\begin{epigraphcontent}[35mm] + {Wang Chong} + {``Lunheng'' Chapter~85} + {circa.~80} + In scientific discourse what matters are the solid facts of a matter, not elegance. + % I extended the translation of Christoph Harbsmeier with the original translation from Alfred Forke (which Harbsmeier references). + \end{epigraphcontent}} + [Adapted from the translation of \textcite{marginalia_sino-logica}, Chong promotes truth over elegance despite the influence of early Chinese skepticism.] + 論貴是而不務華 +\end{cjkepigraph} +\input{mainmatter/context/introduction.tex} +\input{mainmatter/context/history.tex} +\input{mainmatter/context/word.tex} +\input{mainmatter/context/sentence.tex} +\input{mainmatter/context/knowledge base.tex} +\input{mainmatter/context/conclusion.tex} diff --git a/mainmatter/context/cnn.tex b/mainmatter/context/cnn.tex @@ -0,0 +1,34 @@ +\begin{tikzpicture} + \foreach \n in {-2,-1,0,1,2}{ + \pgfmathsetmacro\xfocus{ifthenelse(\n>-2 && \n<2, 1, \transparencyDefault)} + \pgfmathsetmacro\ofocus{ifthenelse(\n==0, 1, \transparencyDefault)} + + \ifnum\n=0 \def\t{t} + \else\ifnum\n<0 \def\t{t\n} + \else \def\t{t+\n} + \fi\fi + + \node[opacity=\xfocus] (x\n) at ($(10mm*\n, 0mm)$) {\(\vctr{x}_{\t}\)}; + \node[draw,opacity=\ofocus] (conv\n) at ($(10mm*\n, 10mm)$) {Conv}; + \node[opacity=\ofocus] (h\n) at ($(10mm*\n, 18mm)$) {\(h_{\t,i}\)}; + } + + \node[draw,minimum width=48.5mm] (pool) at (0mm, 26mm) {Pooling}; + + \foreach \n in {-2,-1,0,1,2}{ + \pgfmathsetmacro\ofocus{ifthenelse(\n==0, 1, \transparencyDefault)} + \foreach \d in {-1,0,1}{ + \pgfmathsetmacro\src{int(\n+\d)} + \pgfmathsetmacro\isvalid{int(ifthenelse(\src>=-2 && \src<=2, 1, 0))} + \ifnum\isvalid>0 + \draw[arrow,opacity=\ofocus] (x\src) -- (conv\n); + \fi + } + + \draw[arrow,opacity=\ofocus] (conv\n) -- (h\n); + \draw[arrow,opacity=\ofocus] (h\n) -- (h\n |- pool.south); + } + + \node (o) at (0mm, 33mm) {\(o_i\)}; + \draw[arrow] (pool) -- (o); +\end{tikzpicture}% diff --git a/mainmatter/context/conclusion.tex b/mainmatter/context/conclusion.tex @@ -0,0 +1,32 @@ +\section{Conclusion} +\label{sec:context:conclusion} +As exposed in Section~\ref{sec:context:history}, we are in the middle of a transition away from symbolic representations towards distributed ones. +We inscribe this thesis within this transition. +We deal with two kinds of symbolic representations of meaning: unstructured language and structured knowledge bases. +In this chapter, we presented methods to extract distributed representations for both of these systems. +While in the following chapters, we will deal with the link between language and knowledge bases. + +Following word2vec (Section~\ref{sec:context:word2vec}), feature extraction for textual inputs is now mostly done through word embeddings. +In order to obtain a representation of a sentence, the models on top of these word embeddings progressively evolved from \textsc{cnn} (Section~\ref{sec:context:cnn}) and \textsc{rnn} (Section~\ref{sec:context:rnn}) towards transformers and contextualized word embeddings (Section~\ref{sec:context:transformers}). +As we will see in the \hyperref[chap:relation extraction]{next} chapter, this trend was exactly followed by relation extraction models. + +We then introduce the structured knowledge representation we handle throughout this thesis, knowledge bases. +In particular, Section~\ref{sec:context:relation algebra} gives a formal notation for handling relations which we use to write modeling hypotheses in subsequent chapters. +Finally, Section~\ref{sec:context:knowledge base completion} presents common models making use of distributed representations of knowledge bases for the task of knowledge base completion. +This task is not only the usual evaluation framework for distributed knowledge base representations but is also of special interest for Chapter~\ref{chap:fitb}, where we leverage the similarity between the knowledge base completion and the relation extraction tasks. + +The progression of models presented in this chapter also reflects a progression of the scale of problems. +We started by exploring the representation of words, one of the smallest semantic units, then moved on to sentences, then to knowledge bases, which purpose to represent whole pans of human knowledge. +Another underlying thread to this chapter is the notion of relationship. +While the idea is quite pervasive in Section~\ref{sec:context:knowledge base}, it is also present in Section~\ref{sec:context:word} through the not-so-randomly chosen example of Figure~\ref{fig:context:word2vec pca}.% +\sidenote{ + This figure presented the word embeddings of some countries and their capitals. + The relationship between the words seems to bear the same regularity as the relationship between the underlying entities. + This regularity being representative of the \textsl{capital of} relationship. +} +Even in Section~\ref{sec:context:sentence}, representations of sentences are obtained by modeling the relationship of words with each other. +For example, in a transformer, the attention weights capture the relationship between two words: the query and one element of the memory. + +In the next chapter, we make the link between the two symbolic representations of meaning we studied: language and knowledge bases. +More specifically, we present relation extraction models. +State-of-the-art models build heavily on the distributed representations methods introduced in this chapter and are the main focus of this thesis. diff --git a/mainmatter/context/fact.tex b/mainmatter/context/fact.tex @@ -0,0 +1,11 @@ +\begin{tikzpicture} + \node (capital) at (0, 0) {\textsl{capital of}\textsuperscript{\,\wdrel{1376}}}; + \node[left=0mm of capital] (paris) {\vphantom{capital of}Paris\textsuperscript{\,\wdent{90}}}; + \node[right=0mm of capital] (france) {\vphantom{capital of}France\textsuperscript{\,\wdent{142}}}; + + \draw[decorate,decoration={brace,amplitude=5}] ($(capital.north west) + (1mm,0)$) -- ($(capital.north east) + (-1mm,0)$) node [midway,anchor=south,yshift=1mm] {\vphantom{entity}relation}; + \draw[decorate,decoration={brace,amplitude=5}] ($(paris.north west) + (1mm,0)$) -- ($(paris.north east) + (-1mm,0)$) node [midway,anchor=south,yshift=1mm,align=center] {head\\entity}; + \draw[decorate,decoration={brace,amplitude=5}] ($(france.north west) + (1mm,0)$) -- ($(france.north east) + (-1mm,0)$) node [midway,anchor=south,yshift=1mm,align=center] {tail\\entity}; + + \draw[decorate,decoration={brace,amplitude=5}] ($(france.south east) + (-1mm,0)$) -- ($(paris.south west) + (1mm,0)$) node [midway,anchor=north,yshift=-2mm] {fact}; +\end{tikzpicture} diff --git a/mainmatter/context/history.tex b/mainmatter/context/history.tex @@ -0,0 +1,113 @@ +\section{Historical Development} +\label{sec:context:history} +In this section, we expose the rationale for applying deep learning to relation extraction, how the related fields appeared and why the task is relevant. +Since algorithms were first given to train generic deep neural networks~\parencite{deepbeeliefnets,relu}, most problems tackled by machine learning can now be approached with deep learning methods. +Over the last few years, deep learning has been very successful in a variety of tasks such as image classification~\parencite{cnn_imagenet}, machine translation~\parencite{nmt_encdec}, audio synthesis~\parencite{wavenet}, etc. +This is why it is not surprising that deep learning is now applied to more tasks traditionally tackled by other machine learning methods, such as in this thesis, where we apply it to relation extraction. + +From a historical point of view, machine learning---and hence deep learning---are deeply anchored in \emph{empiricism}. +Empiricism is the epistemological paradigm in which knowledge is anchored in sensory experiences of the world, which are called empirical evidence. +This is not to say that there are no theoretical arguments motivating the use of certain machine learning methods; the universal approximation theorems~\parencite{universal_approximator_sigmoid, universal_approximator_nonpolynomial} can be seen as a theoretical argument for deep learning. +But in the end, a machine learning method draws its legitimacy from the observation that they perform strongly on a real dataset. +This is in stark contrast to the rationalist paradigm, which posits that knowledge comes primarily from reason. + +This strong leaning on empiricism can also be seen in \textsc{nlp}. +\textsc{nlp} comes from the \emph{externalist} approach to linguistic theorizing, focusing its analyses on actual utterances. +A linguistic tool that externalists often avoid while being widely used by other schools is elicitation through prospective questioning: ``Is this sentence grammatical?'' +Externalists consider that language is acquired through distributional properties of words and other constituents;% +\sidenote{In other words, language is acquired by observing empirical co-occurrences: where words go and where they don't in actual utterances tell us where they can go and where they can't.} +and study these properties by collecting corpora of naturally occurring utterances. +The associated school of structural linguistics inscribes itself into the broader view of \emph{structuralism}, the belief that phenomena are intelligible through a concept of structure that connects them together, the focus being more on these interrelations instead of each individual object. +In the case of linguistics, this view was pioneered by Ferdinand de Saussure which stated in its course in general linguistics: +\begin{quote} + \begin{epigraph}{Ferdinand de Saussure}{\citetitle{linguistique_generale}}{\cite*{linguistique_generale}} + La langue est un système dont toutes les parties peuvent et doivent être considérées dans leur solidarité synchronique. + \end{epigraph} + Language is a system whose parts can and must all be considered in their synchronic% + \sidenote{ + Saussure makes a distinction between syn\-chron\-ic---at a certain point in time---and dia\-chron\-ic---changing over time---analyses. + This does not mean that the meaning of a word is not influenced by its history, but that this influence is entirely captured by the relations of the word with others at the present time and that conditioned on these relations, the current meaning of the word is independent of its past meaning. + } + solidarity.\\ + \null\hfill--- + \begin{minipage}[t]{5cm} + Ferdinand de Saussure, \citetitle{linguistique_generale}~(\cite*{linguistique_generale}) + \end{minipage} +\end{quote} +This train of thought gave rise to \emph{distributionalism} whose ideas are best illustrated by the distributional hypothesis stated in \textcite{distributional_hypothesis}: +\begin{spacedblock} + \strong{Distributional Hypothesis:} + \emph{Words that occur in similar contexts convey similar meanings.} +\end{spacedblock} +This can be pushed further by stating that a word is solely characterized by the context in which it appears. + +On the artificial intelligence side, deep learning is usually compared to symbolic approaches. +The distinction originates in the way information is represented by the system. +In the symbolic approach, information is carried by strongly structured representations in which a concept is usually associated with a single entity, such as a variable in a formula or in a probabilistic graphical model. +On the other hand, deep learning uses distributed representations in which there is a many-to-many relationship between concepts and neurons; each concept is represented by many neurons, and each neuron represents many concepts. +The idea that mental phenomena can be represented using this paradigm is known as \emph{connectionism}. +One particular argument in favor of connectionism is the ability to degrade gracefully: deleting a unit in a symbolic representation equates to deleting a concept, while deleting a unit in a distributed representation merely lowers the precision with which concepts are defined. +Note that connectionism is not necessarily incompatible with a symbolic theory of cognition. +Distributed representations can be seen as a low-level explanation of cognition, while from this point of view, symbolic representation is a high-level interpretation encoded by distributed representations.% +\sidenote[][-3cm]{ + This view on the relation between distributed and symbolic representations can be seen in the early neural networks literature as can be seen in \textcite{concept_backprop}, which is often cited for its formalization of the backpropagation algorithm. + More recently, \textcite{binding_symbolic} investigate the binding problem between symbols and distributed representations. +} + +Furthermore, we can make a distinction on how structured is the kind of data used. +In this thesis, we will especially focus on the relationship between unstructured text% +\sidenote{ + Of course, language does have a structure. + We do not deny the existence of grammar but merely state that text is less structured than other structures studied in this chapter (see Section~\ref{sec:context:knowledge base}). +} +and structured data (in the form of knowledge bases). +To give a sense of this difference, compare the following text from the Paris Wikipedia page to facts from the Wikidata knowledge base: +\begin{spacedblock} +\null\hfill% +\begin{minipage}{0.425\textwidth} + Paris is the capital and most populous city of France. + The City of Paris is the centre and seat of government of the region and province of Île-de-France. +\end{minipage}% +\hfill% +\begin{minipage}{0.425\textwidth} + \begin{itemize}[label={},leftmargin=0mm] + \item Paris \textsl{capital of} France + \item Paris \textsl{located in the administrative territorial entity} Île-de-France + \end{itemize} +\end{minipage}% +\hfill\null% +\end{spacedblock} +\begin{marginparagraph}[-1cm] + We use \textsl{slanted text} to indicate a relational surface form such as ``\textsl{capital of}'' in the fact ``Paris \textsl{capital of} France.'' +\end{marginparagraph}% + +Through this example, we see that both natural languages and knowledge bases encode meaning. +To talk about what they encode, we assume the existence of a semantic space containing all possible meanings. +We do not assume any theory of meaning used to define this space; this allows us to stay neutral on whether language is ontologically prior to propositional attitudes and its link with reality or semantically evaluable mental states. +In the same way that different natural languages are different methods to address this semantic space, knowledge bases seek to refer to the same semantic space% +\sidenote{ + Strictly speaking, practical knowledge bases only seek to index a subset of this space, see note~\ref{note:context:knowledge vs meaning} in the margin of page \pageref{note:context:knowledge vs meaning}. +} +with an extremely rigid grammar. + +Both natural language and knowledge bases are discrete systems. +For both these systems, we can use the distributional hypothesis to obtain continuous distributed representations. +These representations purpose to capture the semantic as a simple topological space such as a Euclidean vector space where distance encodes dissimilarity, as shown in Figure~\ref{fig:context:word2vec pca}. +Moreover, using a differentiable manifold allows us to train these representations through backpropagation using neural architectures. + +The question of how to process texts algorithmically has evolved over the last fifty years. +Language being conveyed through symbolic representations, it is quite natural for us to manipulate them. +As such, early machine learning models strongly relied on them. +For a long time, symbolic approaches had an empirical advantage: they worked better. +However, in the last few years, distributed representations have shown unyielding results, and most tasks are now tackled with deep learning using distributed representations. +\begin{marginparagraph} + This transition from rule-based models to statistical models to neural network models can also be seen in relation extraction with Hearst (\cite*{hearst_hyponyms}, symbolic rule-based, Section~\ref{sec:relation extraction:bootstrap}), \textsc{sift} (\cite*{sift}, symbolic statistical, Section~\ref{sec:relation extraction:hand-designed features}) and \textsc{pcnn} (\cite*{pcnn}, distributed neural, Section~\ref{sec:relation extraction:pcnn}). +\end{marginparagraph} +As an example, this can be seen in the machine translation task. +Early models from the 1950s onward were rule-based. +Starting in the 1990s, statistical approaches were used, first using statistics of words then of phrases. +Looking at the Workshop on statistical machine translation (\textsc{wmt}): at the beginning of the last decade, no neural approaches were used and the report~\parencite{wmt2010} deplored the disappearance of rule-based systems, at the end of the decade, most systems were based on distributed representations~\parencite{wmt2020}.% +\sidenote{To be more precise, most models use transformers which are a kind of neural network introduced in Section~\ref{sec:context:transformers}.} +While this transition occurred in \textsc{nlp}, knowledge representation has been a stronghold of symbolic approaches until very recently. +The research reported in this thesis aims to develop the distributed approach to knowledge representation for the task of relation extraction. +In the remainder of this chapter, we first report the distributed approaches to \textsc{nlp}, which showcased state-of-the-art results for the last decade, before presenting a structured symbolic representation, knowledge bases, and some methods to obtain distributed representations from them. diff --git a/mainmatter/context/introduction.tex b/mainmatter/context/introduction.tex @@ -0,0 +1,33 @@ +Language conveys meaning. +Thus, it should be possible to explicitly map a text to its semantic content. +The research reported in this thesis seeks to algorithmically extract meaning conveyed by language using deep learning techniques from the information extraction and natural language processing (\textsc{nlp}) fields. +We focus on the task of relation extraction, in which we seek to extract the semantic relation conveyed by a sentence. +For example, given the sentence ``Paris is the capital of France,'' we seek to extract the relation ``\textsl{capital of}.'' +To build a formal representation of relations, we use knowledge bases. +In their simplest form, knowledge bases encode knowledge as a set of facts, which take the form \((\text{entity}, \text{relation}, \text{entity})\) such as \((\text{Paris}, \textsl{capital of}, \text{France})\). +Like natural languages, knowledge bases purpose to convey meaning% +\sidenote{ + Knowledge bases usually focus on knowledge which can be seen as a subset of all possible meanings. + For example, facts like \((\text{I}, \textsl{want}, \text{ice cream})\) are not usually encoded in knowledge bases. + However, they theoretically could. + To be precise, throughout this thesis we'll be using knowledge bases in two ways: + \begin{itemize}[nosep] + \item as a basic theoretical structured representation of meaning, + \item as a practical datasets to evaluate algorithms on. + \end{itemize} + This means that algorithms tested on existing knowledge bases are only tested on a subset of possible meanings. + However, when we discuss the representation of knowledge base facts, note that this can be generalized to any meaningful facts expressible in the knowledge base framework. + \label{note:context:knowledge vs meaning} +} +but in a structure that is readily manipulable by algorithms. +However, most knowledge---like this thesis---comes in the form of text. +There lies the usefulness of the relation extraction task on which we focus. +By ``translating'' natural language into knowledge bases, we seek to make more knowledge available to algorithms. + +In this chapter, we focus on the two kinds of data we deal with in this thesis, namely text and knowledge bases. +Subsequent chapters will deal with the extraction of knowledge base facts from text. +In Section~\ref{sec:context:history}, we begin by positioning this task within the larger historical context by focusing on how the fields of machine learning, \textsc{nlp} and information extraction developed. +Before delving into the specific algorithms for relation extraction, we must first define how to process language and how to represent semantic information in a way that can be manipulated by machine learning algorithms. +In particular, we seek to obtain a \emph{distributed representation}---which we define in the next section---of both language and knowledge bases since deep learning algorithms cannot directly work with non-distributed representations. +We first inspect the representation of words in Section~\ref{sec:context:word} before exploring how to process whole sentences in Section~\ref{sec:context:sentence}. +Finally, Section~\ref{sec:context:knowledge base} focuses on knowledge bases by first giving a formal definition before studying methods for extracting distributed representations from them. diff --git a/mainmatter/context/knowledge base.tex b/mainmatter/context/knowledge base.tex @@ -0,0 +1,295 @@ +\section{Knowledge Base} +\label{sec:context:knowledge base} +Our goal is to extract structured knowledge from text. +In this section, we introduce the object we use to express this knowledge, namely the knowledge base. +A knowledge base is a symbolic semantic representation of some piece of knowledge. +It is defined by a set of concepts, named \emph{entities}, and by the relationships linking these entities together, named \emph{facts} or \emph{statements}. +Formally, a knowledge base is constructed from a set of entities \(\entitySet\), a set of relations \(\relationSet\) and a set of facts \(\kbSet\subseteq\entitySet\times\relationSet\times\entitySet\). +Note that these facts purpose to encode some kind of truth about the world. +To illustrate, here are some examples from Wikidata~\parencite{wikidata}: +\begin{align*} + \entitySet = \{ & \wdent{90}\text{(Paris)}, \wdent{7251}\text{(Alan Turing)}, \dotsc\} \\ + \relationSet = \{ & \wdrel{1376}\text{(\textsl{capital of})}, \wdrel{19}\text{(\textsl{place of birth})}, \dotsc\} \\ + \kbSet = \{ + & \wdent{90}~\wdrel{1376}~\wdent{142}\text{ (Paris is the capital of France)}, \\ + & \wdent{3897}~\wdrel{1376}~\wdent{916}\text{ (Luanda is the capital of Angola)}, \\ + & \wdent{7251}~\wdrel{19}~\wdent{122744}\text{ (Alan Turing was born in Maida Vale)}, \\ + & \wdent{164047}~\wdrel{19}~\wdent{23311}\text{ (Alexander Pope was born in London)}, \\ + & \dotsc\} \hfill +\end{align*} + +As indicated by the identifiers such as \wdent{7251}, knowledge bases link concepts together. +An entity is a concept that may have several textual representations---surface forms---such as ``Alan Turing'' and ``Alan Mathison Turing.'' +Here, we showed the Wikidata identifier whose purpose is to identify concepts uniquely. +For ease of reading, when there is no ambiguity between an entity and one of its surface forms, we simply write the surface form without the identifier of its associated concept. + +\begin{marginfigure} + \centering + \input{mainmatter/context/fact.tex} + \scaption[Structure of a knowledge base fact.]{ + Structure of a knowledge base fact. + \label{fig:context:fact} + } +\end{marginfigure} + +Given two entities \(e_1, e_2\in\entitySet\) and a relation \(r\in\relationSet\), we simply write \tripletHolds{e_1}{r}{e_2} as a shorthand notation for \((e_1, r, e_2)\in\kbSet\), meaning that \(r\) links \(e_1\) and \(e_2\) together. +As illustrated by Figure~\ref{fig:context:fact}, \(e_1\) is called the \emph{head entity} of the fact or \emph{subject} of the relation \(r\). +Similarly, \(e_2\) is called the \emph{tail entity} or \emph{object}, while \(r\) is called the \emph{relation}, \emph{property} or \emph{predicate}.% +\sidenote{The term \emph{predicate} can either refer to the relation \(r\), or to the couple \((r, e_2)\), thus we will avoid using this terminology.} + +Thanks to this extremely rigid structure, knowledge bases are easier to process algorithmically. +Querying some piece of information from a knowledge base is well defined and formalized. +Query languages such as \textsc{sparql} ensure that information can be retrieved deterministically. +\begin{marginparagraph} + Example of \textsc{sparql} query for all capital cities in Asia: + + \smallskip + + \input{mainmatter/context/sparql.tex} +\end{marginparagraph} +This is in contrast to natural language, where querying some knowledge from a piece of text needs to be performed using an \textsc{nlp} model, thus incurring some form of variance on the result. +With this in mind, it is not surprising that several machine learning models rely on knowledge bases to remove a source of uncertainty from their system; this can be done in a variety of tasks such as question answering \parencite{kb_qa1, kb_qa2}, document retrieval \parencite{kb_document_retrieval} and logical reasoning \parencite{ntn}. + +Commonly used general knowledge bases include Freebase~\parencite{freebase}, \textsc{db}pedia~\parencite{dbpedia} and Wikidata~\parencite{wikidata}. +There are also several domain-specific knowledge bases such as Wordnet~\parencite{wordnet} and GeneOntology~\parencite{geneontology}. +Older works focus on Freebase---which is now discontinued---while newer ones focus on Wikidata and \textsc{db}pedia. +These knowledge bases usually include more information than what was described above. +For example, Wikidata includes statement qualifiers that may modify a statement, such as the fact ``Versailles capital of France'' qualified by ``end time: 5 October 1789.'' +For the sake of simplicity, we limit ourselves to triplets in \(\entitySet\times\relationSet\times\entitySet\). +Further details on the specific knowledge bases can be found in Appendix~\ref{chap:datasets}. + +\subsection{Relation Algebra} +\label{sec:context:relation algebra} +\begin{marginparagraph} + The concept of relation algebra was theorized as a structure for logical systems. + Developed by several famous mathematicians such as Augustus De Morgan, Charles Peirce and Alfred Tarski, it can be used to express \textsc{zfc} set theory. + Here we only use relation algebra as a formal framework to express properties of binary relations. +\end{marginparagraph} +Relations linking two entities from the same set of entities \(\entitySet\) are called binary endorelations. +A relation such as ``\textsl{capital of}'' is a subset of the cartesian square \(\entitySet^2\); it is a set of pairs of entities linked together by this relation. +The set of all possible such relations exhibit a structure called a relation algebra \((2^{\entitySet^2}, \relationAnd, \relationOr, \bar{\ }, \relationZero, \relationOne, \relationComposition, \relationIdentity, \breve{\ })\). +We use it as a formalized system of notation for relation properties. +A relation algebra is defined from: +\begin{itemize} + \item three special relations: + \begin{itemize}[nosep] + \item \(\relationZero\), the empty relation linking no entities together (\(\tripletHolds{e_1}{\relationZero}{e_2}\) is always false); + \item \(\relationOne\), the complete relation linking all entities together (\(\tripletHolds{e_1}{\relationOne}{e_2}\) is always true); + \item \(\relationIdentity\), the identity relation linking all entities to themselves (\(\tripletHolds{e_1}{\relationIdentity}{e_2}\) is true if and only if \(e_1=e_2\)). + \end{itemize} + \item two unary operators: + \begin{itemize}[nosep] + \item the complementary relation \(\bar{r}\) which links together entities not linked by \(r\); + \item the converse \(\breve{r}\) which reverses the direction of the relation such that \(\tripletHolds{e_1}{\breve{r}}{e_2}\) holds if and only if \(\tripletHolds{e_2}{r}{e_1}\) holds. + \end{itemize} + \item three binary operators (in order of lowest precedence, to highest precedence): + \begin{itemize}[nosep] + \item disjunction \(\tripletHolds{e_1}{(r_1\relationOr r_2)}{e_2}\), either \(r_1\) or \(r_2\) link \(e_1\) with \(e_2\); + \item conjunction \(\tripletHolds{e_1}{(r_1\relationAnd r_2)}{e_2}\), both \(r_1\) and \(r_2\) link \(e_1\) with \(e_2\); + \item composition \(\tripletHolds{e_1}{(r_1\relationComposition r_2)}{e_2}\), there exist \(e_3\in\entitySet\) such that both \(\tripletHolds{e_1}{r_1}{e_3}\) and \(\tripletHolds{e_3}{r_2}{e_2}\) hold. + \end{itemize} +\end{itemize} +\begin{marginparagraph} + Note that \(\relationComposition\) composes relations in the opposite order of the function composition \(\circ\). + Indeed while \(f\circ g\) means that \(g\) is applied first, then \(f\) is applied, ``\(\textsl{mother}\relationComposition\textsl{born in}\)'' means that ``\textsl{mother}'' is first applied to the entity, then ``\textsl{born in}'' is applied to the result. +\end{marginparagraph} + +Thanks to this framework, we can express several properties on knowledge base relations since \(\relationSet\subseteq 2^{\entitySet^2}\). +For example, the \emph{functional} property can be stated as \(\breve{r}\,\relationComposition\,r\,\relationOr\,\relationIdentity = \relationIdentity\). +A relation \(r\) is functional when for all entities \(e_1\) there is at most one entity \(e_2\) such that \(\tripletHolds{e_1}{r}{e_2}\) holds. +The relation ``\textsl{born in}'' is functional since all entities are either born at a single place or not born at all. +Taking the above definition this means that for all cities \(c\) if we take all entities who were born in \(c\) (\(\breve{r}\,\color{black!30}\relationComposition\,r\,\relationOr\,\relationIdentity = \relationIdentity\)) and then (\(\color{black!30}\breve{r}\,\color{black}\relationComposition\,\color{black!30}r\,\relationOr\,\relationIdentity = \relationIdentity\)) look at where these entities were born (\(\color{black!30}\breve{r}\,\relationComposition\,\color{black}r\,\color{black!30}\relationOr\,\relationIdentity = \relationIdentity\)), we must be back to \(c\) and only c (\(\color{black!30}\breve{r}\,\relationComposition\,r\,\relationOr\,\relationIdentity \color{black}= \relationIdentity\)) or no such \(c\) shall exist (\(\color{black!30}\breve{r}\,\relationComposition\,r\,\color{black}\relationOr\,\relationIdentity \color{black!30}= \relationIdentity\)). +We need to take the disjunction with \(\relationIdentity\) since some entities were not born anywhere, for example \(\tripletHolds{e}{(\breve{r}\relationComposition r)}{e}\) is false when \(r\) is ``\textsl{born in}'' and \(e\) is ``Mount Everest.'' + +Other common properties of binary relations can be defined this way. +One particular property of interest is the restriction of the domain and co-domain of relations. +A lot of relations can only apply to a specific type of entity, such as locations or people. +To express these properties, we use the notation \(\relationOne_X\subseteq \relationOne\) with \(X\subseteq\entitySet\) to refer to the complete relation restricted to entities in \(X\): \(\relationOne_X = \{\, (x_1, x_2) \mid x_1, x_2 \in X \,\}\). +This allows us to define left-restriction (restriction of the domain) and right-restriction (restriction of the co-domain). +Relevant properties are given in Table~\ref{tab:context:relation properties}. + +\begin{margintable}[0mm] + \centering + \input{mainmatter/context/relation properties.tex} + \scaption[Relation properties expressed in relation algebra.]{ + Some fundamental relation properties expressed as conditions in relation algebra. + \label{tab:context:relation properties} + } +\end{margintable} + +Some relation properties recurring in the literature are the cardinality constraints. +They can be defined as combinations of the injective and functional properties: +\begin{description} + \item[Many-to-Many] (\(N\to N\)\,) the relation is neither injective nor functional.\\ + Examples: ``author of,'' ``language spoken,'' ``sibling of.'' + \item[Many-to-One] (\(N\to 1\)) the relation is functional but it is not injective.\\ + Examples: ``place of birth,'' ``country.'' + \item[One-to-Many] (\(1\to N\)\,) the relation is injective but it is not functional.\\ + Examples: ``contains administrative territorial entity,'' ``has part.'' + \item[One-to-One] (\(1\to 1\)) the relation is both injective and functional.\\ + Examples: ``capital,'' ``largest city,'' ``highest point.'' +\end{description} + +When a relation \(r\) is one-to-many, its converse \(\breve{r}\) is many-to-one. +The usual way to design relations in knowledge bases is to use many-to-one relations, making one-to-many relations quite rare in practice. +Since most systems handle relations in a symmetric fashion, this has little to no effect. + +Most of the examples given above are not strictly true. +A person can be both registered as being born in Paris and in France. +Some countries do not designate a single capital or share their highest point with a neighbor. +However, defining these properties is helpful to evaluate the abilities of models to capture these kinds of relations. +To handle such cases, these properties can be seen in a probabilistic way.% +\sidenote{ + Given empirical data, the propensity of a relation to be many-to-one can be measured with a conditional entropy \(\entropy(\rndm{e}_2\mid \rndm{e}_1, r)\). + An entropy close to zero means the relation tends to be many-to-one. +} + +We use the notations from relation algebra to formalize assumptions made on the structure of knowledge bases. +For example several models assume that \(\forall r_1, r_2\in\relationSet: r_1\relationAnd r_2 = \relationZero\), that is all pairs of entities are linked by at most one relation. +A list of common assumptions is provided in Appendix~\ref{chap:assumptions}, it should prove useful from the Chapter~\ref{chap:relation extraction} onwards. +For readers unfamiliar with relation algebra notations, we provide detailed explanation of complex formulae in the margins throughout this thesis. + +\subsection[Distributed Representation through Knowledge Base Completion]{Distributed Representation through Knowledge\\Base Completion} +\label{sec:context:knowledge base completion} +One problem with knowledge bases is that they are usually incomplete. +However, given some information about an entity, it is usually possible to infer additional facts about this entity. +This is called \emph{knowledge base completion}. +Sometimes this inference is deterministic. +For example, if two entities have the same two parents, we can infer that they are siblings. +Quite often, this reasoning is probabilistic. +For example, the head of state of a country usually lives in this country's capital; this probability can be further increased by facts indicating that previous heads of state died in the capital, etc. + +The task of knowledge base completion is essential for our work because of two reasons. +First of all, it is the standard approach to obtain a distributed representation of knowledge base objects. +Second, the models used to tackle this task are often reused as part of relation extraction systems; this is the case of all approaches presented in this section. + +We define two sub-tasks of knowledge base completion: \emph{relation prediction} and \emph{entity prediction}.% +\sidenote[][-6.5mm]{In the literature, both of these tasks can be called ``link prediction'' and ``knowledge graph completion.''} +In the relation prediction task, the goal is to predict the relation between two entities (\tripletHolds{e_1}{?}{e_2}), while entity prediction focuses on predicting a missing entity in a triplet (\tripletHolds{e_1}{r}{?} or \tripletHolds{?}{r}{e_2}). +Historically, this is performed using symbolic approaches. +\begin{marginparagraph}[-7mm] + Relation prediction is quite similar to our task of interest: relation extraction. + The main difference being that relation prediction is defined on knowledge bases, while relation extraction takes natural language inputs. + This parallel is exploited by the model presented in Chapter~\ref{chap:fitb}. +\end{marginparagraph} +For example, this task can be tackled using an inference engine relying on a human expert inputting logical rules such as: +\begin{equation*} + \sfTripletHolds{e_1}{parent of}{e_2} \land \sfTripletHolds{e_1}{parent of}{e_3} \land e_2\neq e_3 \iff \sfTripletHolds{e_2}{sibling of}{e_3}, +\end{equation*} +or using the relation algebra notation introduced in Section~\ref{sec:context:relation algebra}: +\begin{equation*} + \widebreve{\textsl{parent of}} \,\relationComposition\, \textsl{parent of} \,\relationAnd\,\bar{\relationIdentity} = \textsl{sibling of}. +\end{equation*} +\begin{marginparagraph}[-12mm] + \tripletHolds{e_2}{\widebreve{\textsl{parent of}}}{e_1} means that \(e_1\) is a parent of \(e_2\). + Adding a composition to this, \tripletHolds{e_2}{\widebreve{\textsl{parent of}} \,\relationComposition\, \textsl{parent of}}{e_3} means that the aforementioned \(e_1\) has a child \(e_3\). + This child \(e_3\) could be the same as \(e_2\), this is why we take the conjunction with the complement of the identity relation \(\relationAnd\bar{\relationIdentity}\), thus obtaining the relation \textsl{sibling of}. +\end{marginparagraph} +However, listing all possible logical implications is not feasible. +As with \textsc{nlp}, to tackle this problem, another approach is to leverage distributed representations. +Some good early results were obtained by \textsc{rescal}, which we present in Section~\ref{sec:context:rescal}. +But the problem started to gather a lot of interest in the deep learning community with TransE (Section~\ref{sec:context:transe}) which encodes relations as translation in the semantic space. +This was followed by several other approaches that encoded relations as other kinds of geometric transformations. +All the models presented in this section assume that the entities are embedded in a latent semantic space \(\symbb{R}^d\) with a matrix \(\mtrx{U}\in\symbb{R}^{\entitySet\times d}\) where \(d\) is an hyperparameter. + +\subsubsection{Selectional Preferences} +\label{sec:context:selectional preferences} +Selectional preferences is a simple formalism that purposes to encode each relation with two linear maps assessing the predisposition of an entity to appear as the head or tail of a relation in a true fact. +This can be done using an energy formalism, where the energy of a fact is defined as: +\begin{equation} + \psi_\textsc{sp}(e_1, r, e_2) = \vctr{u}_{e_1}\transpose \vctr{a}_r + \vctr{u}_{e_2}\transpose \vctr{b}_r +\end{equation} +with \(\mtrx{A}, \mtrx{B}\in\symbb{R}^{\relationSet\times d}\) two matrices encoding the preferences of each relation for certain entities. +This energy function can then be used to define the probability that a fact holds using a softmax: +\begin{equation} + P(e_1, r, e_2) \propto \exp \psi_\textsc{sp}(e_1, r, e_2), + \label{eq:context:sp softmax} +\end{equation} +this is sufficient for entity and relation predictions as we can usually compute the partition function over the set of all entities or relations. +If this is not feasible, a technique such as \textsc{nce} (Section~\ref{sec:context:nce}) or negative sampling (Section~\ref{sec:context:negative sampling}) can be used to approximate Equation~\ref{eq:context:sp softmax}. +Still, selectional preferences do not encode the interaction of the head and tail entities. +As such it is quite weak for entity prediction, thus more expressive models are needed. + +\subsubsection{\textsc{rescal}} +\label{sec:context:rescal} +\textsc{rescal}~\parencitex{rescal} purposes to model relations by a bilinear form \(\entitySet\times\entitySet\mapsto\symbb{R}\) in the semantic space of entities. +In other words, each relation \(r\in\relationSet\) is represented by a matrix \(\mtrx{C}_r\in\symbb{R}^{d\times d}\) with the training algorithm seeking to enforce the following property: +\begin{equation*} + \vctr{u}_{e_1}\transpose \mtrx{C}_r \vctr{u}_{e_2} = + \begin{cases} + 1 & \quad \text{if \tripletHolds{e_1}{r}{e_2} holds} \\ + 0 & \quad \text{otherwise.} + \end{cases} +\end{equation*} +This can be seen as trying to factorize the tensor of facts \(\tnsr{X}\) as \(\mtrx{U}\tnsr{C}\mtrx{U}\transpose\), where \(\tnsr{X}\in\{0,1\}^{\entitySet\times\relationSet\times\entitySet}\) with \(x_{e_1re_2}=1\) if \tripletHolds{e_1}{r}{e_2} holds and \(x_{e_1re_2}=0\) otherwise. +The parameters of the models \(\mtrx{U}\) and \(\tnsr{C}\) are trained using an alternating least-squares approach, minimizing a regularized reconstruction loss: +\begin{equation} + \symcal{L}_\textsc{rescal}(\tnsr{X}; \mtrx{U}, \tnsr{C}) = \frac{1}{2} \sum_{\substack{e_1,e_2\in\entitySet\\r\in\relationSet}} (x_{e_1re_2} - \vctr{u}_{e_1}\transpose \mtrx{C}_r \vctr{u}_{e_2})^2 + \frac{1}{2} \lambda ( \|\mtrx{U}\|_F^2 + \sum_{r\in\relationSet} \|\mtrx{X}_r\|_F^2 ) +\end{equation} + +Using bilinear forms allows \textsc{rescal} to capture entities interactions for each relation in a simple manner. +However, the number of parameters to estimate grows quadratically with respect to the dimension of the semantic space \(d\). +This can be prohibitive as a large \(d\) is needed to ensure accurate modeling of the entities. + +\subsubsection{TransE} +\label{sec:context:transe} +To find a balance between the number of parameters and the expressiveness of the model, geometric approaches were developed starting with TransE~\parencitex{transe}. +TransE proposes to leverage the regularity exhibited by Figure~\ref{fig:context:word2vec pca} to embed both entities and relations in the same vector space. +Formally, its assumption is that relations can be represented as translations between entities' embeddings. +In addition to representing each entity \(e\) by an embedding \(\vctr{u}_e\in\symbb{R}^d\), each relation \(r\) is also embedded as a translation in the same space as \(\vctr{v}_r\in\symbb{R}^d\). +The idea being that if \tripletHolds{e_1}{r}{e_2} holds then \(\vctr{u}_{e_1} + \vctr{v}_r \approx \vctr{u}_{e_2}\). +The authors argue that translations can represent hierarchical data by drawing a parallel with the embedding of a tree in an Euclidean plane---that is the usual representation of a tree as drawn on paper. +As long as the distance between two levels in the tree is large enough, the children of a node are close together; this not only allows for the representation of one-to-many relations ``child'' but also for the many-to-many, symmetric and transitive relation ``sibling'' as the null translation. + +In order to enforce the translation property, a margin-based loss is used to train an energy-based model. +The energy of true triplets drawn from the knowledge base is minimized, while negative triplets are sampled and have their energy maximized up to a certain margin. +Given a positive triplet \((e_1, r, e_2)\) and a negative triplet \((e_1', r, e_2')\), the TransE loss can be expressed as: +\begin{equation} + \symcal{L}_\textsc{te}(e_1, r, e_2, e_1', e_2') = \max\left(0, \gamma + \Delta(\vctr{u}_{e_1} + \vctr{v}_r, \vctr{u}_{e_2}) - \Delta(\vctr{u}_{e_1'} + \vctr{v}_r, \vctr{u}_{e_2'})\right), + \label{eq:context:transe loss} +\end{equation} +where \(\Delta\) is a distance function such as the squared Euclidean distance \(\Delta(\vctr{u}_{e_1} + \vctr{v}_r, \vctr{u}_{e_2}) = \|\vctr{u}_{e_1} + \vctr{v}_r - \vctr{u}_{e_2} \|_2^2\). +The negative triplets \((e_1', r, e_2')\) are sampled by replacing one of the two entities of \((e_1, r, e_2)\) by a random one which is sampled uniformly over all possible entities: +\begin{equation*} +\begin{split} + N(e_1, e_2) = & + \begin{cases} + (e_1, e') & \text{ with probability } 50\% \\ + (e', e_2) & \text{ with probability } 50\% \\ + \end{cases} \\ + & \text{with } e' \sim \uniformDistribution(\entitySet). +\end{split} +\end{equation*} + +Since \(d\) is a distance, when the loss \(\symcal{L}_\textsc{te}\) is perfectly minimized, the positive part \(+\Delta(\vctr{u}_{e_1} + \vctr{v}_r, \vctr{u}_{e_2})\) is 0. +This means that the negative part \(-\Delta(\vctr{u}_{e_1'} + \vctr{v}_r, \vctr{u}_{e_2'})\) contributes to the loss only when it is smaller than the margin \(\gamma\). +Since this criterion depends on the distance between entities, it can easily be optimized by increasing the entity embeddings norms. +To avoid this degenerate solution, the entity embeddings are renormalized at each training step. +The training loop and initialization procedure are detailed in Algorithm~\ref{alg:context:transe}. +Parameters \(\mtrx{U}\) and \(\mtrx{V}\) are optimized by stochastic gradient descent with early-stopping based on validation performance. +\begin{marginalgorithm}[-1cm] + \input{mainmatter/context/transe.tex} + \scaption[The TransE training algorithm.]{ + The TransE training algorithm. + The relations are initialized randomly on the sphere but are free to drift away afterward, while entities are renormalized at each iteration. + The loop updates parameters \(\mtrx{U}\) and \(\mtrx{V}\) using gradient descent and is stopped based on validation score. + The gradient of \(\symcal{L}_\textsc{te}\) is computed from Equation~\ref{eq:context:transe loss}. + \label{alg:context:transe} + } +\end{marginalgorithm} + +\paragraph{Evaluation} +The quality of the embeddings can be evaluated by measuring the accuracy of entity prediction based on them. +Given a true triplet \((e_1, r, e_2)\in\kbSet\), the energy \(\Delta(\vctr{u}_{e'} + \vctr{v}_r, \vctr{u}_{e_2})\) is computed for all possible entities \(e'\in\entitySet\). +The entity minimizing the energy is predicted as completing the triplet. +The same procedure is then applied on \(e_2\). +The correct entity minimizes the energy quite rarely, therefore in order to have a more informative score \textcite{transe} reports the mean rank of the correct entity among all the entities ranked by the energy of their associated triplets. +For reference, on WordNet, the mean rank of the correct entity is 263 among 40\,943 entities. + +When expanding the expression \(\Delta(\vctr{u}_{e_1} + \vctr{v}_r, \vctr{u}_{e_2})\) where \(d\) is the Euclidean distance, the main term ends up being \(\vctr{u}_{e_1}\transpose\vctr{u}_{e_2} + \vctr{v}_r\transpose (\vctr{u}_{e_2} - \vctr{u}_{e_1})\). +As such, TransE captures all 2-way interactions between \(e_1\), \(r\) and \(e_2\). +However, this means that 3-way interactions are not captured, this is however standard in information extraction. +Furthermore, TransE is unable to model several symmetric relations (when \(r=\breve{r}\)). +To solve these problems, other geometric transformations were proposed to improve TransE expressiveness, such as first projecting entities on a hyperplane (TransH, \cite{transh}) or having the entities and relations live in different spaces (TransR, \cite{transr}). +Finally, all the methods mentioned in this section are not only useful for entity and relation predictions, but also as methods to obtain distributed representations of knowledge bases entities and relations. +The matrices \(\mtrx{U}\) and \(\mtrx{V}\) learned by TransE can subsequently be used for other tasks involving knowledge bases, in the same way that transfer learning is used to obtain distributed representations of text using language models (Section~\ref{sec:context:transfer learning}). diff --git a/mainmatter/context/lstm.tex b/mainmatter/context/lstm.tex @@ -0,0 +1,78 @@ +\begin{tikzpicture}[ + inval/.style={shape=circle,draw=black,inner sep=0,minimum size=16}, + mult/.style={shape=circle,draw=black,inner sep=0,minimum size=6,path picture={ + \draw[black,thick,line cap=round] (45:0.075) -- (225:0.075) (135:0.075) -- (315:0.075); + }}, + sigm/.style={shape=circle,draw=black,inner sep=0,minimum size=16,path picture={ + \draw[step=0.05,very thin,color=gray!20] (-1,-1) grid (1,1); + \draw[color=gray!80] (-1,-0.075) -- (1,-0.075) (0,-1) -- (0,1); + \draw[thick] (-1, -0.075) -- (-0.15,-0.075) edge[thick,out=360,in=180,looseness = 1] (0.15,0.15) (0.15,0.15) -- (1, 0.15); + }}, + tanh/.style={shape=circle,draw=black,inner sep=0,minimum size=16,path picture={ + \draw[step=0.05,very thin,color=gray!20] (-1,-1) grid (1,1); + \draw[color=gray!80] (-1,0) -- (1,0) (0,-1) -- (0,1); + \draw[thick] (-1, -0.15) -- (-0.15,-0.15) edge[out=360,in=180,looseness=1] (0.15,0.15) (0.15,0.15) -- (1, 0.15); + }}, + tanhd/.style={shape=circle,draw=black,inner sep=0,minimum size=16,path picture={ + \draw[step=0.05,very thin,color=gray!4] (-1,-1) grid (1,1); + \draw[color=gray!16] (-1,0) -- (1,0) (0,-1) -- (0,1); + \draw[thick] (-1, -0.15) -- (-0.15,-0.15) edge[out=360,in=180,looseness=1] (0.15,0.15) (0.15,0.15) -- (1, 0.15); + }}, + ] + \node (x) at (-4.2, 0) {\(\vctr{x}_t, \vctr{h}_{t-1}\)}; + \node (h) at (3.5, 0) {\(\vctr{h}_t\)}; + + \node[inval,very thick] (c) at (0, 0) {\(\vctr{c}_t\)}; + \node[sigm] (i) at (-1.25, 1.5) {}; + \node[sigm] (o) at (2.5, 1.5) {}; + \node[sigm] (f) at (0, -1.5) {}; + + \node[mult] (i_mult) at (-1.25, 0) {}; + \node[mult] (o_mult) at (2.5, 0) {}; + \node[mult] (f_mult) at (0, -0.75) {}; + + \node[tanh] (i_act) at (-2.5, 0) {}; + \node[tanh] (c_act) at (1.25, 0) {}; + + \node[above] at (c.north) {\tiny Cell}; + \node[left,xshift=1mm] at (i.west) {\tiny Input Gate}; + \node[left] at (o.west) {\tiny Output Gate}; + \node[left] at (f.west) {\tiny Forget Gate}; + + \draw + let \p1 = (o.north) in + let \p2 = (o.east) in + let \p3 = (f.south) in + let \p4 = (i_act.west) in + (\x4, \y3) rectangle (\x2, \y1); + + \draw[arrow] (x) -- (i_act); + \draw (-3.5,0) .. controls ++(0:1) and ++(0:-1) .. (-2.5, 2.2); + \draw (-3.5,0) .. controls ++(0:1) and ++(0:-1) .. (-2.5, -2.2); + + \draw (-2.5,-2.2) -- (-0.3,-2.2); + \draw[arrow] (-0.3,-2.2) to[out=0,in=-90] (f); + + \draw (-2.5,2.2) -- (2.2,2.2); + \draw[arrow] (-1.55,2.2) to[out=0,in=90] (i); + \draw[arrow] (2.2,2.2) to[out=0,in=90] (o); + + \draw[arrow] (o_mult) -- (h); + + \draw[arrow] (i_act) -- (i_mult); + \draw[arrow] (i) -- (i_mult) node[pos=0.4,left=-0.1] {\(\vctr{i}_t\)}; + \draw[arrow] (i_mult) -- (c); + \draw[arrow] (c) -- (i); + \draw[arrow] (c) -- (o); + \draw[arrow] (c) -- (c_act); + \draw[arrow] (c_act) -- (o_mult); + \draw[arrow] (o) -- (o_mult) node[pos=0.4,left=-0.1] {\(\vctr{o}_t\)}; + \draw[arrow] (f) -- (f_mult) node[pos=0.4,right=-0.05] {\(\vctr{f}_t\)}; + + \draw (c) edge [arrow, out=300, in=50] (f_mult.north east); + \draw (f_mult.north west) edge [arrow, out=130, in=250] (c); + + \node at (1.8,-1.25) {\LARGE LSTM}; + + \draw (c) edge [arrow, out=210, in=150] (f); +\end{tikzpicture}% diff --git a/mainmatter/context/memory network lm.tex b/mainmatter/context/memory network lm.tex @@ -0,0 +1,19 @@ +\begin{tikzpicture} + \memorynetwork{(1.25,-0.5)}{1}[1]; + \memorynetwork{(1.25,1.35)}{2}[2]; + + \node[below of=q1] (qh) {\(0.1\)}; + \node (wi) at (2.5, 3) {\(\hat{w}_t\)}; + \node (xim1) at (1.5, 3) {\(\vctr{x}_{t-1}\)}; + \node at (0.5, 3) {\(\cdots\)}; + \node (x1) at (-0.5, 3) {\(\vctr{x}_1\)}; + + \draw[arrow] (qh) -- (q1); + \draw[arrow] (o1) -- (q2); + \draw[arrow] (o2) -- (wi); + + \draw[memorybrace] (xim1.south east) -- (x1.south west) coordinate[midway,yshift=-5] (ylti); + + \draw[arrow,rounded corners=5pt] (ylti) -- (ylti|-m1) -- (m1); + \draw[arrow,rounded corners=5pt] (ylti) -- (ylti|-m2) -- (m2); +\end{tikzpicture}% diff --git a/mainmatter/context/relation properties.tex b/mainmatter/context/relation properties.tex @@ -0,0 +1,12 @@ +\begin{tabular}{@{}l l@{}} + \toprule + Property & Condition \\ + \midrule + Injective & \(r\relationComposition\breve{r} \relationOr \relationIdentity = \relationIdentity\) \\ + Functional & \(\breve{r} \relationComposition r \relationOr \relationIdentity = \relationIdentity\) \\ + Symmetric & \(r = \breve{r}\) \\ + Transitive & \(r\relationComposition r \relationOr r = r\) \\ + Left-restriction & \(r\relationComposition \breve{r} \relationOr 1_X = 1_X\) \\ + Right-restriction & \(\breve{r}\relationComposition r \relationOr 1_X = 1_X\) \\ + \bottomrule +\end{tabular} diff --git a/mainmatter/context/rnn lm.tex b/mainmatter/context/rnn lm.tex @@ -0,0 +1,21 @@ +\begin{tikzpicture}[ltt/.style={out=60,in=-100,looseness=1.5}] + \useasboundingbox (-24.5mm, 38mm) rectangle (24.5mm, -2mm); + \foreach \t/\n/\x/\focus in {t-1/t/-1.9/\transparencyDefault, t/t+1/0/1, t+1/t+2/1.9/\transparencyDefault}{ + \begin{scope}[opacity=\focus] + \node (x\t) at (\x, 0mm) {\(\vctr{x}_{\t}\)}; + \node[draw] (rnn\t) at (\x, 9mm) {Linear}; + \node (h\t) at (\x, 18mm) {\(\vctr{h}_{\t}\)}; + \node[draw] (lin\t) at (\x, 27mm) {Linear}; + \node (o\t) at (\x, 36mm) {\(\hat{w}_{\n}\)}; + + \draw[arrow] (x\t) -- (rnn\t); + \draw[arrow] (rnn\t) -- (h\t); + \draw[arrow] (h\t) -- (lin\t); + \draw[arrow] (lin\t) -- (o\t); + \end{scope} + } + + \node at (-1.9, 18mm) {\(\vctr{h}_{t-1}\)}; + \draw[arrow] (ht-1) to[ltt] ($(rnnt.south) + (-3mm, 0)$); + \draw[arrow,opacity=\transparencyDefault] (ht) to[ltt] ($(rnnt+1.south) + (-3mm, 0)$); +\end{tikzpicture}% diff --git a/mainmatter/context/sentence.tex b/mainmatter/context/sentence.tex @@ -0,0 +1,341 @@ +\section{Distributed Representation of Sentences} +\label{sec:context:sentence} +Most \textsc{nlp} tasks are tackled at the sentence level. +In the previous section, we saw how to obtain representations of words. +We now focus on how to aggregate these word representations in order to process whole sentences. +Henceforth, given a sentence of length \(m\), we assume symbolic words \(\vctr{w}\in V^m\) are embedded as \(\mtrx{X}\in\symbb{R}^{m\times d}\) in a vector space of dimension \(d\). +This can be achieved through the use of an embedding matrix \(\mtrx{U}\in\symbb{R}^{V\times d}\) such as the one provided by word2vec. + +An early approach to sentence representation was to use \emph{bag-of-words}, that is to simply ignore the ordering of the words. +In this section, we focus on more modern, deep learning approaches. +Section~\ref{sec:context:cnn} presents \textsc{cnn}s, which process fixed-length sequences of words to produce representations of sentences. +We then focus on \textsc{rnn}s in Section~\ref{sec:context:rnn}, a method to get representations of sentences through a causal language model. +\textsc{rnn}s can be improved by an attention mechanism as explained in Section~\ref{sec:context:attention}. +Finally, we present transformers in Section~\ref{sec:context:transformers}, which build upon the concept of attention to extract state-of-the-art contextualized word representations. + +\subsection{Convolutional Neural Network} +\label{sec:context:cnn} +\begin{marginfigure}[-45mm] + \centering + \input{mainmatter/context/cnn.tex} + \scaption[Architecture of a single convolutional filter with a pooling layer.]{ + Architecture of a single convolutional filter with a pooling layer. + The filter is of width 3, which means it works on trigrams. + A single filter (the \(i\)-th) is shown here, this is repeated \(d'\) times, meaning that \(\vctr{h}_t,\vctr{o}\in\symbb{R}^{d'}\).} + \label{fig:context:cnn} +\end{marginfigure} + +Convolutional neural networks (\textsc{cnn}) can be used to build the representation of a sentence from the representation of its constituting words~\parencite{unified_nlp,cnn_classification}. +These words embeddings can come from word2vec (Section~\ref{sec:context:word2vec}) or can be learned using a \textsc{cnn} with a language model objective (Section~\ref{sec:context:language model}), the latter being the original approach proposed by \textcitex{unified_nlp}. + +The basic idea behind \textsc{cnn} is to recognize patterns in a position-invariant fashion~\parencite{tdnn}. +This is applicable to natural language following the principle of compositionality: the words composing an expression and the rules used to combine them determine its meaning, with little influence from the location of the expression in the text. +So, given a sequence of \(d\)-dimensional embeddings \(\vctr{x}_1, \dotsc, \vctr{x}_m\in\symbb{R}^d\), a one dimensional \textsc{cnn} works on the \(n\)-grams of the sequence, that is the subwords% +\sidenote{Here we use \emph{subwords} in its formal language theory meaning. In the simple setting where we deal with words in a sentence, this \emph{subword} actually designates a sequence of consecutive words.} +\(\vctr{x}_{t:t+n-1} = (\vctr{x}_t, \dotsc, \vctr{x}_{t+n-1})\) of length \(n\). +The basic design of a \textsc{cnn} is illustrated in Figure~\ref{fig:context:cnn}. +A convolutional layer is parametrized by \(d'\) filters \(\mtrx{W}^{(i)}\in\symbb{R}^{n\times d}\) of width \(n\) and a bias \(b^{(i)}\in\symbb{R}\). +The \(t\)-th output of the \(i\)-th filter layer is defined as: +\begin{equation} + h^{(i)}_t = f(\mtrx{W}^{(i)} * \vctr{x}_{t:t+n-1} + b^{(i)}) + \label{eq:context:convolution} +\end{equation} +where \(*\) is the convolution operator% +\sidenote{ + Usually, a cross-correlation operator is actually used, which is equivalent up to a mirroring of the filters when they are real-valued. +} +and \(f\) is a non-linear function. +As is usual with neural networks, several layers of this kind can be stacked. +To obtain a fixed-size representation---which does not depend on the length of the sequence \(m\)---a pooling layer can be used. +Most commonly, max-over-time pooling~\parencite{maxpool}, which simply takes the maximum activation over time---that is sequence length---for each feature \(i=1, \dotsc, d'\). + +In the same way that word2vec produces a real vector space where words with similar meanings are close to each other, the sentence representations \(\vctr{o}\) extracted by a \textsc{cnn} tend to be close to each other when the sentences convey similar meanings. +This is somewhat dependent on the task on which the \textsc{cnn} is trained. +However, the purpose of \textsc{cnn} is usually to extract the semantics of a sentence, and the nature of most tasks makes it so that sentences with similar meanings should have similar representations. + +\subsection{Recurrent Neural Network} +\label{sec:context:rnn} +A limitation of \textsc{cnn}s is the difficulty they have modeling patterns of non-adjacent words. +A second approach to process whole sentences is to use recurrent neural networks (\textsc{rnn}). +\textsc{rnn}s purpose to sum up an entire sentence prefix into a fixed-size hidden state, updating this hidden state as the sentence is processed. +This can be used to build a causal language model following the decomposition of Equation~\ref{eq:context:causal lm}. +As showcased by Figure~\ref{fig:context:rnn}, the hidden state \(\vctr{h}_t\) can be used to predict the next word \(w_{t+1}\) with a simple linear layer followed by a softmax, formally: +\begin{marginfigure} + \centering + \input{mainmatter/context/rnn lm.tex} + \scaption{\textsc{rnn} language model unrolled through time.} + \label{fig:context:rnn} +\end{marginfigure} +\begin{align} + \vctr{h}_t & = f(\mtrx{W}^{(x)} \vctr{x}_t + \mtrx{W}^{(h)} \vctr{h}_{t-1} + \vctr{b}^{(h)}) + \label{eq:context:rnn} \\ + \hat{w}_t & = \softmax(\mtrx{W}^{(o)} \vctr{h}_t + \vctr{b}^{(o)}) + \nonumber +\end{align} +where \(\mtrx{W}^{(x)}\), \(\mtrx{W}^{(h)}\), \(\mtrx{W}^{(o)}\), \(\vctr{b}^{(h)}\) and \(\vctr{b}^{(o)}\) are model parameters and \(f\) is a non-linearity, usually a sigmoid \(f(x) = \sigmoid(x) = \frac{1}{1 + \symup{e}^{-x}}\). +This model is usually trained by minimizing the negative log-likelihood: +\marginnote{ + We generally use \(\vctr{\theta}\) to refer to the set of model parameters. + In this case \(\vctr{\theta} = \{\mtrx{W}^{(x)}, \mtrx{W}^{(h)}, \mtrx{W}^{(o)}, \vctr{b}^{(h)}, \vctr{b}^{(o)}\}\). +}[1cm] +\begin{equation*} + \loss{rnn}(\vctr{\theta}) = \sum_{t=1}^m - \log P(w_t\mid \vctr{x}_1, \dotsc, \vctr{x}_{t-1}; \vctr{\theta}) +\end{equation*} +using the backpropagation-through time algorithm. +The gradient is run through all the steps of the \textsc{rnn} until reaching the beginning of the sequence. +When the sequence is a sentence, this can easily be achieved. +However, when longer spans of text are considered, the gradient only goes back a fixed number of tokens in order to limit memory usage. + +\subsubsection{Long Short-term Memory} +\label{sec:context:lstm} +Standard \textsc{rnn}s tend to have a hard time dealing with long sequences. +This problem is linked to the vanishing and exploding gradient problems. +When the gradient goes through several non-linearities, it tends to be less meaningful, and gradient descent does not lead to satisfying parameters anymore. +In particular, when \(\mtrx{W}^{(h)}\) has a large spectral norm, the values \(\vctr{h}_t\) tend to get bigger and bigger with long sequences, on the other hand when its spectral norm is small, these values get smaller and smaller. +When \(\vctr{h}_t\) has a large magnitude, the sigmoid activation saturates and \(\frac{\partial \loss{rnn}}{\partial \vctr{h}_t}\) gets close to zero, the gradient vanishes. +\textsc{rnn} variants are used to alleviate this vanishing gradient problem, the most common being long short-term memory (\textsc{lstm}, \citex{lstm}). +\begin{figure}[ht!] + \centering + \input{mainmatter/context/lstm.tex} + \scaption[Architecture of an \textsc{lstm} cell.]{ + \label{fig:context:lstm cell} + Architecture of an \textsc{lstm} cell. + In its simplest form, this block replaces the linear layer at the bottom of Figure~\ref{fig:context:rnn}. + The link between \(\vctr{c}_t\) and \(\vctr{c}_{t-1}\) is illustrated by a self-loop but could be seen as an additional input and output. + } +\end{figure} + +\textsc{lstm}s redefine the recurrence of \textsc{rnn}s (Equation~\ref{eq:context:rnn}) by adding multiplicative gates as illustrated by Figure~\ref{fig:context:lstm cell}. +It is governed by the following set of equations: +\begin{equation*} +\def\arraystretch{1.25} +\begin{array}{l l l l} + \vctr{x}'_t & = & \begin{bmatrix}\vctr{x}_t\\ \vctr{h}_{t-1}\end{bmatrix} & \text{Recurrent input}\\[3.9mm] + \tilde{\vctr{c}}_t & = & \tanh(\mtrx{W}^{(c)} \vctr{x}'_t + \vctr{b}^{(c)}) & \text{Cell candidate}\\ + \vctr{i}_t & = & \sigmoid(\mtrx{W}^{(i)} \vctr{x}'_t + \mtrx{U}^{(i)} \vctr{c}_{t-1} + \vctr{b}^{(i)} & \text{Input gate}\\ + \vctr{f}_t & = & \sigmoid(\mtrx{W}^{(f)} \vctr{x}'_t + \mtrx{U}^{(f)} \vctr{c}_{t-1} + \vctr{b}^{(f)}) & \text{Forget gate}\\ + \vctr{c}_t & = & \vctr{i}_t\odot \tilde{\vctr{c}}_t + \vctr{f}_t\odot \vctr{c}_{t-1} & \text{New cell}\\ + \vctr{o}_t & = & \sigmoid(\mtrx{W}^{(o)} \vctr{x}'_t + \mtrx{U}^{(o)} \vctr{c}_t + \vctr{b}^{(o)}) & \text{Output gate}\\ + \vctr{h}_t & = & \vctr{o}_t\odot \tanh(\vctr{c}_t) & \text{Hidden layer output}\\ +\end{array} +\end{equation*} +\marginnote{ + \(\odot\) is the element-wise multiplication and \(\sigmoid\) the sigmoid function. +}[-18mm] +\marginnote{ + As with \textsc{rnn}, \(\vctr{\theta} = \{ \mtrx{W}^{(c)}, \mtrx{W}^{(i)}, \mtrx{U}^{(i)},\\\mtrx{W}^{(f)}, \mtrx{U}^{(f)}, \mtrx{W}^{(o)}, \mtrx{U}^{(o)}, \vctr{b}^{(c)}, \vctr{b}^{(f)}, \vctr{b}^{(i)},\\\vctr{b}^{(o)} \}\) are model parameters. +}[-8mm] + +The main peculiarity of \textsc{lstm} is the presence of multiple gates used as masks or mixing factors in the unit. +\textsc{lstm} units are interpreted as having an internal cell memory \(\vctr{c}_t\) which is an additional (internal) state alongside \(\vctr{h}_t\) and is used as input of the cell alongside \(\vctr{x}_t\) and \(\vctr{h}_{t-1}\). +When computing its activation, we first compute a cell candidate \(\tilde{\vctr{c}}_t\) which is the potential successor to \(\vctr{c}_t\). +Then, the multiplicative gates come into play, the cell \(\vctr{c}_t\) is partially updated with a mix of \(\vctr{c}_{t-1}\) and \(\tilde{\vctr{c}}_t\) controlled by the input and forget gates \(\vctr{i}_t\) and \(\vctr{f}_t\). +Finally, the output of the unit is masked by the output gate \(\vctr{o}_t\).% +\sidenote{Note that the output gate \(\vctr{o}_t\) has its value computed from the new cell value \(\vctr{c}_t\) instead of \(\vctr{c}_{t-1}\) in contrast to the expression of \(\vctr{i}_t\) and \(\vctr{f}_t\).} + +It has been theorized~\parencite{lstm_vanishing} that the gates are what makes \textsc{lstm}s so powerful. +The multiplications allow the model to learn to control the flow of information in the unit, thus counteracting the vanishing gradient problem. +The basic building block of multiplicative gates has been reused for other \textsc{rnn} cell designs such as gated recurrent unit (\textsc{gru}, \cite{nmt_encdec}). +Furthermore, random cell designs using multiplicative gates can be shown to perform as well as \textsc{lstm}~\parencite{lstm_odyssey}. +However, standard practice is to always use \textsc{lstm} or \textsc{gru} for recurrent neural networks. + +\subsubsection{\textsc{elm}o} +\label{sec:context:elmo} +Recurrent neural networks with \textsc{lstm} cells were widely used for language modeling, both at the character-level~\parencite{charrnn} and at the word-level~\parencite{lm_limits}. +The first language model to become widely used for extracting contextual word embeddings was \textsc{elm}o (Embeddings from Language Model, \citex{elmo}) which uses several \textsc{lstm} layers. + +The peculiarity of the word embeddings extracted by \textsc{elm}o is that they are contextualized (see Section~\ref{sec:context:language model}). +Static word embeddings models like word2vec (Section~\ref{sec:context:word2vec}) map each word to a unique vector. +However, this fares poorly with polysemic words and homographs whose meaning depends on the context in which they are used. +\begin{marginparagraph} + Before \textsc{elm}o, \textcite{cove} already trained contextualized word representations using an \textsc{nmt} task. +\end{marginparagraph} +Contextualized word embeddings provide an answer to this problem. +Given a sentence, \textsc{elm}o proposes to use the hidden states \(\vctr{h}_t\) as a representation of each constituting word \(w_t\). +These representations are hence a function of the whole sentence.% +\sidenote{ + In order to encode both the left and right context of a word, \textsc{elm}o uses bidirectional \textsc{lstm}, meaning that each layer contains two \textsc{lstm}, one running from left-to-right and one right-to-left.} +Thus words are mapped to different vectors in different contexts. + +\subsection{Attention Mechanism} +\label{sec:context:attention} +To obtain a vector representation of a sentence from an \textsc{rnn}, two straightforward methods are to use the last hidden state \(\vctr{h}_m\) or use a pooling layer similar to the one used in \textsc{cnn}, such as max-over-time pooling. +However, both of these approaches present shortcomings: the last hidden state tends to encode little information about the beginning of the sentence, while pooling is too indiscriminate and influenced by unimportant words. +Using an attention mechanism is a way to avoid these shortcomings. +Furthermore, an attention mechanism is parametrized by a \emph{query} which allows us to select the piece of information we want to extract from the sentence. + +The concept of attention first appeared in neural machine translation (\textsc{nmt}) under the name ``alignment''~\parencitex{attention} before becoming ubiquitous in \textsc{nlp}. +The same principle was also presented under the name \emph{memory network}~\parencite{memory_networks, memory_networks_end-to-end}. +It is also the building block of transformers, which are presented \hyperref[sec:context:transformers]{next}. +With this in mind, we use the vocabulary of memory networks to describe the attention mechanism. + +\begin{figure}[ht!] + \centering + \input{mainmatter/context/attention.tex} + \scaption[Schema of an attention mechanism.]{ + Schema of an attention mechanism. + The attention scores are obtained by an inner product between the query and the memory. + The output is obtained as a sum of the memory weighted by the softmax of the attention scores. + \label{fig:context:attention} + } +\end{figure} + +\subsubsection{Attention as a Mechanism for \textsc{rnn}} +The principle of an attention layer on top of an \textsc{rnn} is illustrated by Figure~\ref{fig:context:attention}. +The layer takes three inputs: a query \(\vctr{q}\in\symbb{R}^d\), memory keys \(\mtrx{K}\in\symbb{R}^{\ell\times d}\) and memory values \(\mtrx{V}\in\symbb{R}^{\ell\times d'}\). +Originally, more often than not, \(\mtrx{K}=\mtrx{V}\). +In the model of Figure~\ref{fig:context:attention}, the memory corresponds to the hidden states of the \textsc{rnn}, which was the most common architecture when attention was introduced in 2014. +First, attention weights are computed from the query \(\vctr{q}\) and keys \(\mtrx{K}\), then these weights are used to compute the output \(\vctr{o}\in\symbb{R}^{d'}\) as a convex combination of the values \(\mtrx{V}\)\,: +\begin{marginparagraph}[-11mm] + Where \(\softmax\) is a smooth version of the \(\argmax\) function. + It can also be seen as a multi-dimensional sigmoid, defined as: + \begin{equation*} + \softmax(\vctr{x})_i = \frac{\exp x_i}{\sum_j \exp x_j} + \end{equation*} +\end{marginparagraph} +\begin{equation} + \vctr{o} = \softmax(\mtrx{K}\vctr{q})\mtrx{V}. +\end{equation} + +In \textsc{nmt}, the memory is built from the hidden states of an \textsc{rnn} running on the sentence to be translated (meaning \(\ell=m\)), while the query is the state of the translated sentence (``what was already translated''), the attention is then recomputed for each output position. +In other words, a new representation of the source sentence is recomputed for each word in the target sentence. +The attention weights---that is, the output of the softmax---can provide an interpretation of what the model is focusing on when making a prediction. +In the case of \textsc{nmt}, the attention for producing a translated word usually focuses on the corresponding word or group of words in the source sentence. + +\subsubsection{Attention as a Standalone Model} +\label{sec:context:attention lm} +Since the attention mechanism produces a fixed-size representation (\(\vctr{o}\)) from a variable length sequence (\(\mtrx{K}\), \(\mtrx{V}\)\,), it can actually be used by itself without an \textsc{rnn}. +This was already mentioned in \textcitex{memory_networks_end-to-end}[-10mm] and used for language modeling. +We now succinctly present their approach. +As shown Figure~\ref{fig:context:memory network lm}, this is a causal language model (Section~\ref{sec:context:language model}), at each step \(P(w_t\mid w_1,\dotsc,w_{t-1})\) is modeled. +While the previous words constitute the memory of the attention mechanism, there is no natural value for the query. +As such, for the first layer, it is simply taken to be a constant vector \(q^{(1)}_i = 0.1\) for all \(i=1,\dotsc, d\). +When several attention layers are stacked, the output \(o^{(l)}\) of a layer \(l\) is used as the query \(q^{(l+1)}\) for the layer \(l+1\). +Furthermore, residual connections with linear layers and modified ReLU non-linearities% +\sidenote[][-35.5mm]{ + While the standard ReLU activation~\parencite{relu} is defined as \(\ReLU(x)=\max(0, x)\). + The non-linearity used in this model is \(\ReLU_{\halfCircleScript}\), which applies the ReLU activation to half of the units in the layer.} +are introduced between layers thus: \(\vctr{q}^{(l+1)} = \ReLU_{\halfCircleScript}(\mtrx{W}^{(l)} \vctr{q}^{(l)} + \vctr{o}^{(l)})\) where the matrices \(\mtrx{W}^{(l)}\in\symbb{R}^{d\times d}\) are parameters of the model. +As usual, the next word prediction \(\hat{w}_i\) is made using a softmax layer. + +\begin{marginfigure}[-30mm] + \centering + \input{mainmatter/context/memory network lm.tex} + \scaption[Schema of a memory network language model with two layers.]{ + Schema of a memory network language model with two layers. + Each red block corresponds to an attention mechanism as illustrated by Figure~\ref{fig:context:attention}. + } + \label{fig:context:memory network lm} +\end{marginfigure} + +\paragraph{Temporal Encoding} +The attention mechanism as described above is invariant to a permutation of the memory. +This is not a problem when an \textsc{rnn} is run on the sentence, as it can encode the relative positions of each token. +However, in the \textsc{rnn}-less approach of \textcite{memory_networks_end-to-end} this information is lost, which is quite damaging for language modeling. +Indeed, this would mean that shuffling the words in a sentence---like inverting the subject and object of a verb---does not change its meaning. +In order to solve this problem, temporal encoding is introduced. +When predicting \(w_i\), each word embedding \(\vctr{x}_j\) in the memory is summed with a relative position embedding \(\vctr{e}_{i-j}\). +These position embeddings are trained through back-propagation like any other parameters. + +\bigskip + +Attention mechanisms form the basis of current state-of-the-art approaches in \textsc{nlp}. +One of the explanations behind their success is that, in a sense, they are more shallow than \textsc{rnn}. +Indeed, when computing \(\frac{\partial \hat{w}_i}{\partial \vctr{x}_j}\) for the language model of \textcite{memory_networks_end-to-end}, one can see that part of the gradient goes through few non-linearities. +In contrast, the information from \(\vctr{x}_j\) to \(\hat{w}_i\) must go through the composition of at least \(i-j\) non-linearities in an \textsc{rnn}, which may cause the gradient to vanish. +However, an attention mechanism has linear complexity in the length of the sequence for a total of \(\Theta(m\times d^2)\) operations at each step. +When \(m\) is large, this can be prohibitive compared to \textsc{rnn}, which have a \(\Theta(d^2)\) complexity at each step. +On the other hand, an attention layer can easily be parallelized while an \textsc{rnn} always necessitates \(\Omega(m)\) sequential operations. + +\subsection{Transformers} +\label{sec:context:transformers} +Transformers~\parencitex{transformers} were originally introduced for \textsc{nmt}. +Likewise to the memory network language model presented above, they introduce several slight modifications of its architecture which make them the current state of the art for most \textsc{nlp} tasks. +For conciseness, we present the concept of transformers as used by \textsc{bert} (Bidirectional Encoder Representations from Transformers, \citex{bert}). +\textsc{bert} is a language model used to extract contextualized embeddings similar to \textsc{elm}o but using attention layers in place of \textsc{lstm} layers. + +\subsubsection{Transformer Attention} +\label{sec:context:transformer attention} +The attention layers used by transformers are slightly modified. +\marginpar{Note that in contrast to the classical attention mechanism presented in Section~\ref{sec:context:attention}, transformers have \(\mtrx{K}\neq\mtrx{V}\).} +First, it is often advisable that in a neural network, all activations follow a standard normal distribution \(\normalDistribution(0, 1)\). +In order to achieve this, transformers use scaled attention: +\begin{equation} + \operatorname{Attention}(\vctr{q}, \mtrx{K}, \mtrx{V}) = \softmax\left(\frac{\mtrx{K}\vctr{q}}{\sqrt{d}}\right)\mtrx{V}. +\end{equation} +This ensures that if \(\mtrx{K}\) and \(\vctr{q}\) follow a standard normal distribution, so does the input of the softmax. + +Second, multi-head attention is used: each layer actually applies \(h=8\) attentions in parallel. +To ensure each individual attention captures a different part of the semantic, its input is projected by different matrices, one for each attention head: +\begin{equation*} + \operatorname{MultiHeadAttention}(\vctr{q}, \mtrx{K}, \mtrx{V}) = + \begin{bmatrix} + \operatorname{head}_1(\vctr{q}, \mtrx{K}, \mtrx{V}) \\ + \operatorname{head}_2(\vctr{q}, \mtrx{K}, \mtrx{V}) \\ + \vdots\\ + \operatorname{head}_h(\vctr{q}, \mtrx{K}, \mtrx{V}) \\ + \end{bmatrix} \mtrx{W}^{(o)} +\end{equation*} +\begin{equation*} + \operatorname{head}_i(\vctr{q}, \mtrx{K}, \mtrx{V}) = \operatorname{Attention}(\vctr{q}\mtrx{W}_i^{(q)}, \mtrx{K}\mtrx{W}_i^{(k)}, \mtrx{V}\mtrx{W}_i^{(v)}). +\end{equation*} + +Lastly, on top of each attention layer is a linear layer with ReLU activation and a linear layer followed by layer normalization~\parencite{layernorm}. +These linear layers are identical along the sequence length, akin to a convolution with kernel size 1. +While the query of each layer is the output of the preceding layer, similarly to the model of \textcite{memory_networks_end-to-end}, the initial query is now the current word itself \(\vctr{x}_t\). +This architecture is illustrated in Figure~\ref{fig:context:bert}. + +\Textcite{bert} introduce two \textsc{bert} architectures dubbed \bertArch{small} and \bertArch{large}. +Like their names imply, \bertArch{small} has fewer parameters than \bertArch{large}, in particular, \bertArch{small} is composed of 12 layers while \bertArch{large} is composed of 24 layers. + +\begin{marginfigure} + \centering + \input{mainmatter/context/bert.tex} + \scaption[Schema of \textsc{bert}, a transformer masked language model.]{ + Schema of \textsc{bert}, a transformer masked language model. + The schema is focused on the prediction for a single position \(t\), this is repeated for the whole sentence \(t=1, \dotsc, m\). + The model presented is the \bertArch{small} variant containing only 12 layers. + The input vectors \(\vctr{\tilde{x}}_t\) are obtained from the corrupted sentence \(\vctr{\tilde{w}}\) using an embedding layer. + To obtain \(\hat{w}_t\) from the last \textsc{bert} layer output, a linear layer with softmax over the vocabulary is used. + \label{fig:context:bert} + } +\end{marginfigure} + +\subsubsection{Masked Language Model} +\label{sec:context:mlm} +While some transformer models such as \textsc{gpt} (Generative Pre-Training, \cite{gpt}) are causal language models, \textsc{bert} is a \emph{masked} language model (\textsc{mlm}). +Instead of following Equation~\ref{eq:context:causal lm}, the following approximation is used: +\begin{equation} + P(\vctr{w}) \propto \prod_{t\in C} P(w_t \mid \tilde{\vctr{w}}) +\end{equation} +where \(C\) is a random set of indices, 15\% of tokens being uniformly selected to be part of \(C\), and \(\tilde{\vctr{w}}\) is a corrupted sequence defined as follow: +\begin{equation*} + \tilde{w}_t = \left\{\begin{array}{@{}ll@{}} + w_t & \text{if } t\not\in C \\ + \left.\begin{array}{@{}ll@{}} + \text{\blanktag{} token} & \text{with probability } 80\% \\ + \text{random token} & \text{with probability } 10\% \\ + w_t & \text{with probability } 10\% \\ + \end{array}\right\} & \text{if } i\in C\\ + \end{array}\right. +\end{equation*} +The masked tokens \blanktag{} make up the majority of the set \(C\) of tokens predicted by the model, thus the name ``masked language model''. +The main advantage of this approach compared to causal language model is that the probability distribution at a given position is parametrized by the whole sentence, including both the left and right context of a token. + +\subsubsection{Transfer Learning} +\label{sec:context:transfer learning} +The main purpose of \textsc{bert} is to be used on a \emph{downstream task}, transferring the knowledge gained on masked language modeling to a different problem. +As with \textsc{elm}o, the hidden state of the topmost layer, just before the linear and softmax, can be used as contextualized word representations. +Furthermore, the first token, usually called ``beginning of sentence'' but dubbed \textsc{cls} in \textsc{bert}, can be used as a representation of the whole sentence.% +\sidenote{ + This is by virtue of an additional \emph{next sentence prediction} loss with which \textsc{bert} is trained. + We do not detail this task here as it is not essential to \textsc{bert}'s training. + Furthermore, the embedding of the \textsc{cls} token is considered a poor representation of the sentence and is rarely used~\parencite{xlm, xlnet}.} +In contrast with \textsc{elm}o, \textsc{bert} is usually fully fine-tuned on the downstream task. +In the original article~\parencite{bert}, this was shown to outperform previous models on a wide variety of tasks, from question answering to textual entailments. + +\bigskip + +In this section, we presented several \textsc{nlp} models which allow us to get a distributed representation for words, sentences and words contextualized in sentences. +These representations can then be used on a downstream task, such as relation extraction, as we do from Chapter~\ref{chap:relation extraction} onward. +We now focus on the other kind of data handled in this thesis: knowledge bases. diff --git a/mainmatter/context/sparql.tex b/mainmatter/context/sparql.tex @@ -0,0 +1,7 @@ +\begin{tabular}{@{}l l@{}} + \multicolumn{2}{@{}l}{\textsc{select} \texttt{?capital}}\\ + \multicolumn{2}{@{}l}{\textsc{where} \texttt{\{}}\\ + \hspace{2mm} & \texttt{?capital} \textsl{capital of} \texttt{?country.}\\ + & \texttt{?country} \textsl{part of} \texttt{Asia.}\\ + \texttt{\}} & \\ +\end{tabular} diff --git a/mainmatter/context/transe.tex b/mainmatter/context/transe.tex @@ -0,0 +1,28 @@ +\begin{algorithmic} + \Function{TransE}{} + \FunctionInputs{} \(\kbSet\) knowledge base + \FunctionInputs*{} \(\gamma\) margin + \FunctionInputs*{} \(d\) embedding dimension + \FunctionInputs*{} \(b\) batch size + \FunctionOutputs{} \(\mtrx{U}\) entity embeddings + \FunctionOutputs*{} \(\mtrx{V}\) relation embeddings + \State + \LComment{Initialization} + \State \(\mtrx{U} \gets \uniformDistribution_{|\entitySet|\times d}\left(-\frac{6}{\sqrt{d}}, \frac{6}{\sqrt{d}}\right)\) + \State \(\mtrx{V} \gets \uniformDistribution_{|\relationSet|\times d}\left(-\frac{6}{\sqrt{d}}, \frac{6}{\sqrt{d}}\right)\) + \State \(\forall r\in\relationSet: \vctr{v}_r \gets \vctr{v}_r / \|\vctr{v}_r\|_2\) + \LComment{Training} + \Loop + \State \(\forall e\in\entitySet: \vctr{u}_e \gets \vctr{u}_e / \|\vctr{u}_e\|_2\) + \State \(B \gets \emptyset\) + \For{\(i=1,\dotsc,b\)} + \State Sample \((e_1, r, e_2) \sim \uniformDistribution(\kbSet)\) + \State Sample \((e_1', e_2') \sim N(e_1, e_2)\) + \State \(B \gets B \cup \{(e_1, r, e_2, e_1', e_2')\}\) + \EndFor + \State Update \(\mtrx{U}\) and \(\mtrx{V}\) w.r.t. + \State \hspace{7mm}\(\displaystyle\nabla\sum_{\mathrlap{\hspace{-2mm}(e_1, r, e_2, e_1', e_2')\in B}}\loss{te}(e_1, r, e_2, e_1', e_2')\) + \EndLoop + \State \Output \(\mtrx{U}, \mtrx{V}\) + \EndFunction +\end{algorithmic} diff --git a/mainmatter/context/word.tex b/mainmatter/context/word.tex @@ -0,0 +1,183 @@ +\section{Distributed Representation of Words} +\label{sec:context:word} +Natural language processing (\textsc{nlp}) deals with the automatic manipulation of natural language by algorithms. +Nowadays, a large pan of \textsc{nlp} concerns itself with the question of how to obtain good distributed representations from textual inputs. +What constitutes a good representation may vary, but it is usually measured by performance on a task of interest. +Natural language inputs present themselves as tokens or sequences of tokens, usually in the form of words stringed together into sentences. +The goal is then to map these sequences of symbolic units to distributed representations. +This section and the next present several methods designed to achieve this goal which have become ubiquitous in \textsc{nlp} research. +We first describe how to obtain good representations of words---or of smaller semantic units in Section~\ref{sec:context:bpe}---before studying how to use these representations to process whole sentences in Section~\ref{sec:context:sentence}. + +Given a vocabulary, that is a set of words \(V=\{\text{a}, \text{aardvark}, \text{aback}, \dotsc\}\), our goal is to map each word \(w\in V\) to an embedding \(u_w\in\symbb{R}^d\) where \(d\) is a hyperparameter. +An example of an embedding space is given in Figure~\ref{fig:context:word2vec pca}. +\begin{marginparagraph}[-1cm] + In contrast, a symbolic representation of words would simply map each word to an index \(V\to\{1, \dotsc, |V|\}\). +\end{marginparagraph} +One of the early methods to embed words like this is latent semantic analysis (\textsc{lsa}, \citex{lsa}). +Interestingly, \textsc{lsa} was popularized by the information retrieval field under the name latent semantic indexing (\textsc{lsi}). +The basis of \textsc{lsa} is a document--term matrix indicating how many times a word appears in a document. +A naive approach would be to take the rows of this matrix; we would obtain a vector representation of each word, the dimension \(d\) of these embeddings would be the number of documents. +The similarity of two words is then evaluated by taking the cosine similarity of the associated vectors; in the simple case described above, this value would be high if the two words often appear together in the same documents and low otherwise. +We can already see that this representation is distributed since each document makes up a small fraction of the representation of the words it contains. +However, this approach is not practical, as either \(d\) is too large, or the representations obtained tend to be noisy (when the number of documents is relatively small). +So \textsc{lsa} goes one step further and builds a low-rank approximation of this matrix such that \(d\) can be chosen as small as we want. +This basic idea of modeling word co-occurrences forms the basis behind most word embedding techniques. + +\begin{marginfigure} + \centering + \renderEmbeddings{mainmatter/context/word2vec embeddings.xml} + \scaption[Word2vec embeddings \textsc{pca}.]{ + Selected word2vec embeddings of dimension \(d=300\), projected into two dimensions using \textsc{pca} (explained variance ratio \(\explainedvarx + \explainedvary\)). + The representations encode a strong separation between countries and capitals. + Furthermore, the relative position of each country with respect to its associated capital is somewhat similar.} + \label{fig:context:word2vec pca} +\end{marginfigure} + +In this section, we focus on the representation of words, yet most \textsc{nlp} tasks need to process longer chunks of text; this will be the focus of Section~\ref{sec:context:sentence}. +We center our overview of word representations on word2vec in Section~\ref{sec:context:word2vec}. +With the advent of deep learning, word2vec has been the most ubiquitous word embedding technique. +Additionally, it introduced negative sampling, a technique that we make use of in Chapter~\ref{chap:fitb}. +Section~\ref{sec:context:language model} introduces the notion of language model, which is central to several representation extraction techniques in \textsc{nlp}; we also present several alternatives to word2vec used before the transition to sentence-level approaches of Section~\ref{sec:context:transformers}. +Finally, while models presented in this section are focused on words, smaller semantic units can similarly be used. +This is especially needed for languages in which words have a complex internal structure, but it can also be applied to English. +Section~\ref{sec:context:bpe} will explore alternative levels at which we can apply methods from Sections~\ref{sec:context:word2vec} and~\ref{sec:context:language model}. + +\subsection{Word2vec} +\label{sec:context:word2vec} +Word2vec~\parencite{word2vec, word2vec_follow-up}\sidecite{word2vec_follow-up} is one of the first \textsc{nlp} models widely used for the representations it produces. +As its name implies, word2vec outputs word representations; however, its general framework can be used on other kinds of tokens. +Word2vec relies strongly on the distributional hypothesis: its goal is to model the context of a word to produce a representation of the word itself, a technique which was pioneered by \textcitex{nplm}. +Several variants of the word2vec model exist, but for the sake of conciseness, this section focuses on the skip-gram with negative sampling (\textsc{sgns}) approach. + +\subsubsection{Skip-gram} +\label{sec:context:skip-gram} +Given a word, the idea behind skip-gram is to model its context.% +\sidenote{The context of a word \(w\) is defined as all words appearing in a fixed-size window around \(w\) in the text. In the case of word2vec, this window is of size five in both directions.} +The probability of a word \(c\in V\) to appear in the context of a word \(w\in V\) is modeled by the following softmax: +\begin{marginparagraph} + Here, we omit the conditioning on the parameters. + More formally, \(P(c\mid w)\) should be written \(P(c\mid w; \mtrx{U}, \mtrx{U}')\). +\end{marginparagraph} +\begin{equation} + P(c\mid w) = \frac{\exp(\vctr{u}_w\transpose \vctr{u}'_c)}{\sum_{c'\in V} \exp(\vctr{u}_w\transpose \vctr{u}'_{c'})} + \label{eq:context:word2vec softmax} +\end{equation} +where \(V\) is the vocabulary, and \(\mtrx{U},\mtrx{U}'\in\symbb{R}^{V\times d}\) are the model parameters assigning a vector representation to all words in the vocabulary. +The rows of these parameters \(\vctr{u}_w\) and \(\vctr{u}'_w\) are what is of interest when word2vec is used for transfer learning. +Once the model has been trained, \(\vctr{u}_w\) can be used as a distributed representation for \(w\), capturing its associated semantics. +See Figure~\ref{fig:context:word2vec pca} for an example of extracted vectors. + +\subsubsection{Noise Contrastive Estimation} +\label{sec:context:nce} +Evaluating Equation~\ref{eq:context:word2vec softmax} is quite expensive since the normalization term involves all the words in the vocabulary. +Noise Contrastive Estimation (\textsc{nce}, \citex{nce}) is a training method that removes the need to compute the partition function of probabilistic models explicitly. +To achieve this, \textsc{nce} reframes the model as a binary classification problem by modeling the probability that a data point---in word2vec's case a word-context pair---comes from the observed dataset \(P(\rndm{D}=1\mid w,c)\). +\begin{marginparagraph} + We use \(\empP\) to refer to empirical distributions, whereas \(P\) denotes a modeled probability. + For example, \(\empP(c\mid w)\) is the actual frequency of the word \(c\in V\) in the context of \(w\in V\). + While \(P(c\mid w)\) is the probability word2vec assigns to a given pair \((c, w)\in V^2\). +\end{marginparagraph} +This probability is contrasted with \(k\) samples from a noise distribution following the unigram distribution \(\empP(\rndm{W})\), that is the empirical word frequency.% +\sidenote{Word2vec actually scales this distribution and uses various other tricks to lessen the effect of frequent words, refer to \textcite{word2vec_follow-up} for details.} +This translate to \(P(c\mid \rndm{D}=1, w) = \empP(c\mid w)\) and \(P(c\mid \rndm{D}=0, w) = \empP(\rndm{W}=c)\). +Using the prior \(P(\rndm{D}=0)=\frac{k}{k+1}\), the posterior can be expressed as: +\begin{equation} + P(\rndm{D}=1\mid w, c) = \frac{\empP(c\mid w)}{\empP(c\mid w) + k \empP(c)}. + \label{eq:context:word2vec nce posterior} +\end{equation} + +Restating Equation~\ref{eq:context:word2vec softmax} as \(P(c\mid w) = \exp(\vctr{u}_w\transpose\vctr{u}'_c) \times \gamma_w\) and treating \(\gamma_w\) as another model parameter, \textsc{nce} allows us to train \(\mtrx{U}\) and \(\mtrx{U}'\) without computing the denominator of Equation~\ref{eq:context:word2vec softmax}. +Furthermore, estimating \(\gamma_w\) is not even necessary, since \textcite{nplm_nce} showed that using \(\gamma_w=1\) for all \(w\) works well in practice. +The final objective maximised by \textsc{nce} is the log-likelihood of the classification data: +\begin{equation} + \mathop{J_\textsc{nce}}(w,c) = \log P(\rndm{D}=1\mid w,c) + \sum_{i=1}^k \expectation_{c'_i\sim P(\rndm{W})} \left[ \log P(\rndm{D}=0\mid w, c'_i) \right]. + \label{eq:context:word2vec nce objective} +\end{equation} + +\Textcite{nce} showed that optimizing \(J_\textsc{nce}\) is equivalent to maximizing the log-likelihood using Equation~\ref{eq:context:word2vec softmax} under some reasonable assumptions. + +\subsubsection{Negative Sampling} +\label{sec:context:negative sampling} +However, \textsc{sgns} uses a different approximation of Equation~\ref{eq:context:word2vec softmax} called negative sampling. +The difference is mainly visible in the expression of the objective which simplifies to: +\begin{equation} + \mathop{J_\textsc{neg}}(w,c) = \log \sigmoid(\vctr{u}_w\transpose \vctr{u}'_c) + \sum_{i=1}^k \expectation_{c'_i\sim P(\rndm{W})} \left[ \log \sigmoid(-\vctr{u}_w\transpose \vctr{u}'_{c'_i}) \right]. + \label{eq:context:word2vec neg objective} +\end{equation} +This can be shown to be similar to \textsc{nce}, where Equation~\ref{eq:context:word2vec nce posterior} is instead replaced by the following posterior: +\begin{equation} + P(\rndm{D}=1\mid w, c) = \frac{\empP(c\mid w)}{\empP(c\mid w) + 1}. + \label{eq:context:word2vec neg posterior} +\end{equation} + +Optimizing the objective of Equation~\ref{eq:context:word2vec neg objective} is not equivalent to maximizing the log-likelihood of the language model. +But even though this is not an approximation of the softmax of Equation~\ref{eq:context:word2vec softmax}, this method has proven to be quite effective at producing good word representations. +\Textcitex{word2vec_pmi} explain the effectiveness of word2vec by showing that \textsc{sgns} can be interpreted as factoring the pointwise mutual information (\textsc{pmi}) matrix between words and contexts. +This led to the emergence of GloVe~\parencite{glove}, which produces word embeddings by directly factorizing the \textsc{pmi} matrix. + +The negative sampling algorithm is one of the main contributions of word2vec; it can be used outside \textsc{nlp} to optimize softmax over large domains. +In particular, we make use of negative sampling to approximate a softmax over a large number of entities in Chapter~\ref{chap:fitb}. +Furthermore, even though it was initially presented on words, the algorithm can be used on other kinds of tokens, as we will see in Section~\ref{sec:context:bpe}. + +\subsection{Language Modeling for Word Representation} +\label{sec:context:language model} +Word2vec is part of a large class of algorithms that seek to learn word representation from raw text. +More precisely, to obtain distributed representations of natural language inputs, most modern approaches rely on language models. +A language model specifies a probability distribution over sequences of tokens \(P(w_1, \dotsc, w_m)\). +The tokens \(\vctr{w}\) are usually words, but as we see in Section~\ref{sec:context:bpe}, they need not be. +This distribution is often decomposed into a product of conditional distributions on tokens. +The most common approach is the so-called \emph{causal} language model, which uses the following decomposition: +\begin{equation} + P(w_1, \dotsc, w_m) = \prod_{t=1}^m P(w_t\mid w_1, \dotsc, w_{t-1}). + \label{eq:context:causal lm} +\end{equation} +Modeling the tokens one by one cannot only enable the model to factorize the handling of local information but also makes it easy to sample to generate new utterances. +However most language models do not use an exact decomposition but either approximate \(P(\vctr{w})\) directly or use the decomposition of Equation~\ref{eq:context:causal lm} together with an approximation of the conditionals \(P(w_t\mid w_1, \dotsc, w_{t-1})\). +This is for example the case of word2vec which conditions each word on its close neighbors instead of using the whole sentence. + +The use of language models is motivated by transfer learning, the idea that by solving a problem, we can get knowledge about a different but related problem. +To assign a probability to a sequence, language models extract intermediate latent factors, which were proven to capture the semantic information contained in the sequence. +Using these latent factors as distributed representations for natural language inputs improved the performance of most \textsc{nlp} tasks. +The effectiveness of language models can be justified by the externalist approach and the distributional hypothesis exposed in Section~\ref{sec:context:history}: a word is defined by the distribution of the other words with which it co-occurs. + +Since language models process sequences of words, we will delve into the details of these approaches in Section~\ref{sec:context:sentence}. +Apart from the neural probabilistic language model of \textcite{nplm}, a precursor to word embedding techniques was the \textsc{cnn}-based approach of \textcite{unified_nlp}, both of them learn distributed word representations by approximating \(P(\vctr{w})\) using a window somewhat similar to word2vec. + +All of these methods learn \emph{static} word embeddings, meaning that the vector assigned to a word such as ``bank'' is the same regardless of the context in which the word appears. +In the last few years, \emph{contextualized} word embeddings have grown in popularity; in these approaches, the word ``bank'' is assigned different embeddings in the phrases ``robbing a bank'' and ``bank of a river.'' +These methods were first based on recurrent neural networks (Section~\ref{sec:context:rnn}) such as \textsc{elm}o but are now primarily based on transformers (Section~\ref{sec:context:transformers}). +Among contextualized word embedding built using transformers, some are based on the causal decomposition of Equation~\ref{eq:context:causal lm} (e.g.~\textsc{gpt}) while others are based on masked language models (e.g.~\textsc{bert}), a different approximation of \(P(\vctr{w})\) introduced in Section~\ref{sec:context:mlm}. + +\subsection{Subword Tokens} +\label{sec:context:bpe} +We defined word2vec and language models for a vocabulary \(V\) composed of words. +This may seem natural in the case of English and other somewhat analytic languages,% +\sidenote[][-21mm]{ + An analytic language is a language with a low ratio of morphemes to words. + This is in contrast to synthetic languages, where words have a complex inner structure. + Take for example the Nahuatl word ``Nimitztētlamaquiltīz'' (I-you-someone-something-give-\textsc{causative}-\textsc{future}) meaning ``I shall make somebody give something to you'' \parencite{nahuatl}. + For this kind of language, word-level approaches fail. + Older models preprocessed the text with a morphological segmentation algorithm, while modern approaches directly work on subword units. +} +but it cannot directly be applied to all languages. +Furthermore, language models that work at the word level tend to have difficulties working with rare words. +A first solution to this problem is to use character-level models, but these tend to have a hard time dealing with the resulting long sequences. + +Modern approaches neither work at the word-level nor at the character-level; instead, an intermediate subword vocabulary is used. +The standard method to build this vocabulary nowadays is to use the byte pair encoding algorithm (\textsc{bpe}, \cite{bpe}). +\textsc{bpe} listed as Algorithm~\ref{alg:context:bpe} consists in iteratively replacing the most common bigram \(c_1c_2\) in a corpus with a new token \(c_\text{new}\). +This new token can then appear in the most common bigram with another token \(c_\text{new}c_3\), they are then replaced with a new token \(c'_\text{new}\) which represents a tri-gram in the original alphabet: \(c_1c_2c_3\). +This is repeated until the desired vocabulary size is reached. +In this way, \textsc{bpe} extracts tokens close to morphemes, the smallest linguistic unit with a meaning. +As an example, by using this algorithm, the word ``pretrained'' can be split into three parts: ``pre-,'' ``-train-'' and ``-ed.'' + +\begin{marginalgorithm} + \input{mainmatter/context/bpe.tex} + \scaption{The byte pair encoding algorithm.} + \label{alg:context:bpe} +\end{marginalgorithm} + +Word2vec can be both applied to words and to subwords extracted by \textsc{bpe} or other algorithms. +This is the case of fastText~\parencite{fasttext} which uses the word2vec algorithm on fixed-size subwords. +All the models discussed in this section and the next have very loose requirements on the vocabulary \(V\). +However, they might work best using a smaller \(V\); this is especially the case of transformers, the current state-of-the-art approach introduced in Section~\ref{sec:context:transformers}. diff --git a/mainmatter/context/word2vec embeddings.xml b/mainmatter/context/word2vec embeddings.xml @@ -0,0 +1,23 @@ +<!-- +import gensim.downloader +import sklearn.decomposition +from xml.etree import ElementTree +TARGET_WORDS = ["Paris","France","Madrid","Spain","Italy","Rome","Germany","Berlin"] +data = gensim.downloader.load('word2vec-google-news-300') +source = data[TARGET_WORDS] +pca = sklearn.decomposition.PCA(n_components=2, svd_solver='full') +target = pca.fit_transform(source) +root = ElementTree.Element("embeddings") +for word, vector in zip(TARGET_WORDS, target): + embedding = ElementTree.SubElement(root, "embedding") + ElementTree.SubElement(embedding, "x").text = str(vector[0]) + ElementTree.SubElement(embedding, "y").text = str(vector[1]) + ElementTree.SubElement(embedding, "label").text = str(word) +explained = ElementTree.SubElement(root, "explained") +ElementTree.SubElement(explained, "x").text = str(pca.explained_variance_ratio_[0]) +ElementTree.SubElement(explained, "y").text = str(pca.explained_variance_ratio_[1]) + +tree = ElementTree.ElementTree(root) +tree.write("word2vec embeddings.xml") +--> +<embeddings><embedding><x>1.0263773</x><y>0.23883666</y><label>Paris</label></embedding><embedding><x>-0.947096</x><y>0.05958702</y><label>France</label></embedding><embedding><x>0.93604654</x><y>-1.3953391</y><label>Madrid</label></embedding><embedding><x>-0.873475</x><y>-1.1731068</y><label>Spain</label></embedding><embedding><x>-1.005623</x><y>-0.33860308</y><label>Italy</label></embedding><embedding><x>1.3307737</x><y>-0.19644451</y><label>Rome</label></embedding><embedding><x>-1.2115865</x><y>1.060732</y><label>Germany</label></embedding><embedding><x>0.7445842</x><y>1.7443377</y><label>Berlin</label></embedding><explained><x>0.27626586</x><y>0.25357923</y></explained></embeddings> diff --git a/mainmatter/fitb/align.tex b/mainmatter/fitb/align.tex @@ -0,0 +1,12 @@ +\begin{tabular}[b]{c r r r r r r r} + \toprule + \multirow{2}{*}{Model} & \multicolumn{3}{c}{\bcubed} & \multicolumn{3}{c}{V-measure} & \multirow{2}{*}{\textsc{ari}} \\ + \cmidrule(lr){2-4}\cmidrule(lr){5-7} + & \fone & Prec. & Rec. & \fone & Hom. & Comp. & \\ + \midrule + \(\loss{ep}+\loss{s}+\loss{d}\) & 39.4 & 32.2 & 50.7 & 38.3 & 32.2 & 47.2 & 33.8 \\ + \loss{align} average & 37.6 & 30.3 & 49.7 & 39.4 & 33.1 & 48.8 & 20.3 \\ + \loss{align} maximum & 41.2 & 33.6 & 53.4 & 43.5 & 36.9 & 53.1 & 29.5 \\ + \loss{align} minimum & 34.5 & 26.5 & 49.3 & 35.9 & 29.6 & 45.7 & 15.3 \\ + \bottomrule +\end{tabular} diff --git a/mainmatter/fitb/chapter.tex b/mainmatter/fitb/chapter.tex @@ -0,0 +1,20 @@ +\chapter{Regularizing Discriminative Unsupervised Relation Extraction Models} +\label{chap:fitb} +\begin{epigraph} + {Samuel Beckett} + {\citetitle{molloy}} + {\cite*{molloy}} + And once again I am I will not say alone, no, that's not like me, but, how shall I say, I don't know, restored to myself, no, I never left myself, free, yes, I don't know what that means but it's the word I mean to use, free to do what, to do nothing, to know, but what, the laws of the mind perhaps, of my mind, that for example water rises in proportion as it drowns you and that you would do better, at least no worse, to obliterate texts than to blacken margins, to fill in the holes of words till all is blank and flat and the whole ghastly business looks like what is, senseless, speechless, issueless misery. +\end{epigraph} +\begin{epigraph} + {Bill Watterson} + {\citetitle{calvinandhobbes}} + {\cite*{calvinandhobbes}} + Careful! We don't want to learn anything from this. +\end{epigraph} +\input{mainmatter/fitb/introduction.tex} +\input{mainmatter/fitb/model.tex} +\input{mainmatter/fitb/related works.tex} +\input{mainmatter/fitb/experiments.tex} +\input{mainmatter/fitb/variants.tex} +\input{mainmatter/fitb/conclusion.tex} diff --git a/mainmatter/fitb/conclusion.tex b/mainmatter/fitb/conclusion.tex @@ -0,0 +1,17 @@ +\section{Conclusion} +\label{sec:fitb:conclusion} +In this chapter, we show that discriminative relation extraction models can be trained efficiently on unlabeled datasets. +Unsupervised relation extraction models tend to produce impure clusters by enforcing a uniformity constrain at the level of a single sample. +We proposed two losses (named RelDist) to effectively train expressive relation extraction models by enforcing the distribution over relations to be uniform---note that other target distributions could be used. +In particular, we were able to successfully train a deep neural network classifier that only performed well in a supervised setting so far. +We demonstrated the effectiveness of our RelDist losses on three datasets and showcased its effect on cluster purity. + +While forcing a uniform distribution with the distance loss \loss{d} might be meaningful with a low number of predicted clusters, it might not generalize to larger numbers of relations. +Preliminary experiments seem to indicate that this can be addressed by replacing the uniform distribution in Equation~\ref{eq:fitb:uniformity} with the empirical distribution of the relations in the validation set or any other appropriate law if no validation set is available.% +\sidenote{In practice, Zipf's law (described in the margin of Section~\ref{sec:relation extraction:oie}) seems to fit the observed empirical distribution quite well.} +This would allow us to avoid the \hypothesis{uniform} assumption. + +All models presented in this chapter make extensive independence assumptions. +As inferred in Section~\ref{sec:fitb:variants} and shown in subsequent work \parencite{selfore,mtb}, this could be solved with sentence representations pre-trained with a language modeling task. +Furthermore, the fill-in-the-blank model is inherently sentence-level. +In the next chapter, we study how to build an unsupervised aggregate relation extraction model using a pre-trained \bertcoder. diff --git a/mainmatter/fitb/confusion lda.xml b/mainmatter/fitb/confusion lda.xml @@ -0,0 +1 @@ +<confusion><gold><relation><identifier>131</identifier><frequency>0.16362139745088683</frequency><surfaceform>located in</surfaceform></relation><clusters><recall>0.15163680048052858</recall><recall>0.22847131845029534</recall><recall>0.11215336870557613</recall><recall>0.16720392431674844</recall><recall>0.13120432475723295</recall><recall>0.04590049053959355</recall><recall>0.012924216638302132</recall><recall>0.030433476824506957</recall><recall>0.11936129742716989</recall><recall>0.0007107818600460506</recall></clusters></gold><gold><relation><identifier>31</identifier><frequency>0.1504288717736312</frequency><surfaceform>instance of</surfaceform></relation><clusters><recall>0.056102415732170244</recall><recall>0.008027061421159682</recall><recall>0.08727525859537302</recall><recall>0.07213478502050273</recall><recall>0.059963671564841906</recall><recall>0.42508619845767304</recall><recall>0.14261629993800237</recall><recall>0.057287984424455346</recall><recall>0.030400591696668443</recall><recall>0.06110573314915324</recall></clusters></gold><gold><relation><identifier>17</identifier><frequency>0.09616322293467831</frequency><surfaceform>in country</surfaceform></relation><clusters><recall>0.05878037128037128</recall><recall>0.31053781053781054</recall><recall>0.06067431067431067</recall><recall>0.20034807534807536</recall><recall>0.04635885885885886</recall><recall>0.04001160251160251</recall><recall>0.03194103194103194</recall><recall>0.18859200109200108</recall><recall>0.05543611793611793</recall><recall>0.00731981981981982</recall></clusters></gold><gold><relation><identifier>31</identifier><reversed /><frequency>0.07373396318413698</frequency><surfaceform>instance of</surfaceform></relation><clusters><recall>0.0406655988610073</recall><recall>0.009031856202171205</recall><recall>0.04191137213027229</recall><recall>0.06885121907812779</recall><recall>0.08609183128670582</recall><recall>0.12677967609895</recall><recall>0.1240211781455775</recall><recall>0.09131962982737142</recall><recall>0.34554636056237764</recall><recall>0.06578127780743905</recall></clusters></gold><gold><relation><identifier>47</identifier><frequency>0.04465273445077674</frequency><surfaceform>shares border</surfaceform></relation><clusters><recall>0.15248162100874146</recall><recall>0.11191982736549505</recall><recall>0.12545261694890458</recall><recall>0.12252660839032954</recall><recall>0.0963022566841008</recall><recall>0.06144617973007571</recall><recall>0.051314875096009654</recall><recall>0.10968874583958158</recall><recall>0.08668300354778538</recall><recall>0.08218426538897626</recall></clusters></gold><gold><relation><identifier>47</identifier><reversed /><frequency>0.044644870053949764</frequency><surfaceform>shares border</surfaceform></relation><clusters><recall>0.15338572267920095</recall><recall>0.10862220916568743</recall><recall>0.12643213866039954</recall><recall>0.12220916568742655</recall><recall>0.0959532902467685</recall><recall>0.06103113983548766</recall><recall>0.05152027027027027</recall><recall>0.10840188014101057</recall><recall>0.08838866039952996</recall><recall>0.08405552291421857</recall></clusters></gold><gold><relation><identifier>131</identifier><reversed /><frequency>0.044184802839571546</frequency><surfaceform>located in</surfaceform></relation><clusters><recall>0.14600859973826882</recall><recall>0.12017199476537671</recall><recall>0.10061693774537296</recall><recall>0.21364741073097776</recall><recall>0.1229388670779585</recall><recall>0.05604785941297439</recall><recall>0.022022808001495607</recall><recall>0.05126191811553561</recall><recall>0.16380631893811928</recall><recall>0.003477285473920359</recall></clusters></gold><gold><relation><identifier>17</identifier><reversed /><frequency>0.035592949306098055</frequency><surfaceform>in country</surfaceform></relation><clusters><recall>0.09598788101358796</recall><recall>0.18068307014322438</recall><recall>0.056096217407271394</recall><recall>0.41383584282041863</recall><recall>0.06495593095850165</recall><recall>0.025936467131839884</recall><recall>0.01748990084465663</recall><recall>0.06394601542416452</recall><recall>0.05334190231362468</recall><recall>0.027726771942710247</recall></clusters></gold><gold><relation><identifier>161</identifier><frequency>0.026758610203792735</frequency><surfaceform>cast member of</surfaceform></relation><clusters><recall>0.0071463474224285365</recall><recall>0.0031761544099682387</recall><recall>0.017957488394820423</recall><recall>0.0004886391399951136</recall><recall>0.0006718788174932812</recall><recall>0.0006718788174932812</recall><recall>0.08508429025164915</recall><recall>0.009406303444905936</recall><recall>0.0026875152699731247</recall><recall>0.8727095040312729</recall></clusters></gold><gold><relation><identifier>36</identifier><reversed /><frequency>0.01588083865927763</frequency><surfaceform>capital of</surfaceform></relation><clusters><recall>0.262888182843087</recall><recall>0.18694270779952854</recall><recall>0.11489187250179358</recall><recall>0.08486215025110178</recall><recall>0.10331044378394999</recall><recall>0.02142051860202931</recall><recall>0.024495234190837347</recall><recall>0.05995695398175669</recall><recall>0.13979706877113868</recall><recall>0.001434867274777083</recall></clusters></gold><gold><relation><identifier>57</identifier><frequency>0.01398682975678042</frequency><surfaceform>director of</surfaceform></relation><clusters><recall>0.010942463819272856</recall><recall>0.0018825744205200612</recall><recall>0.005765384162842687</recall><recall>0.0016472526179550535</recall><recall>0.005294740557712672</recall><recall>0.004235792446170138</recall><recall>0.09154018119778798</recall><recall>0.013413342746205436</recall><recall>0.002117896223085069</recall><recall>0.8631603718084481</recall></clusters></gold><gold><relation><identifier>40</identifier><frequency>0.012234379997168816</frequency><surfaceform>has child</surfaceform></relation><clusters><recall>0.05847639484978541</recall><recall>0.01059549356223176</recall><recall>0.13559549356223177</recall><recall>0.02052038626609442</recall><recall>0.07913090128755365</recall><recall>0.04694206008583691</recall><recall>0.10193133047210301</recall><recall>0.0895922746781116</recall><recall>0.09361587982832618</recall><recall>0.3635997854077253</recall></clusters></gold><gold><relation><identifier>40</identifier><reversed /><frequency>0.010457026314271783</frequency><surfaceform>has child</surfaceform></relation><clusters><recall>0.06511481597986789</recall><recall>0.013211701793016672</recall><recall>0.13321799307958476</recall><recall>0.02170493865995596</recall><recall>0.07832651777288456</recall><recall>0.026895250078641082</recall><recall>0.11151305441962882</recall><recall>0.09232463038691413</recall><recall>0.09877319911921988</recall><recall>0.35891789871028623</recall></clusters></gold><gold><relation><identifier>54</identifier><frequency>0.009276056057420583</frequency><surfaceform>member of</surfaceform></relation><clusters><recall>0.3013506402385546</recall><recall>0.010173653744957024</recall><recall>0.028942290826170846</recall><recall>0.16804069461497984</recall><recall>0.04876337484651815</recall><recall>0.01368181020873531</recall><recall>0.06156814593930889</recall><recall>0.10787581126118224</recall><recall>0.16348009121206805</recall><recall>0.096123487107525</recall></clusters></gold><gold><relation><identifier>36</identifier><frequency>0.008716373149900647</frequency><surfaceform>capital of</surfaceform></relation><clusters><recall>0.24767154533358676</recall><recall>0.1596654628397643</recall><recall>0.13970727998479376</recall><recall>0.08420452385478046</recall><recall>0.11822847367420643</recall><recall>0.023189507698156243</recall><recall>0.034784261547234366</recall><recall>0.05797376924539061</recall><recall>0.13229424063866185</recall><recall>0.0022809351834252043</recall></clusters></gold></confusion>+ \ No newline at end of file diff --git a/mainmatter/fitb/confusion pcnn.xml b/mainmatter/fitb/confusion pcnn.xml @@ -0,0 +1 @@ +<confusion><gold><relation><identifier>131</identifier><frequency>0.16362139745088683</frequency><surfaceform>located in</surfaceform></relation><clusters><recall>0.4227750525578136</recall><recall>0.426148763640004</recall><recall>0.00013014315747322054</recall><recall>0.0004204625087596356</recall><recall>0.07630393432776053</recall><recall>0.012633897287015717</recall><recall>0.007197917709480428</recall><recall>0.011382520772850135</recall><recall>0.042807087796576236</recall><recall>0.00020022024226649314</recall></clusters></gold><gold><relation><identifier>31</identifier><frequency>0.1504288717736312</frequency><surfaceform>instance of</surfaceform></relation><clusters><recall>0.009158246228477577</recall><recall>0.0029149762342422694</recall><recall>0.14495480699159224</recall><recall>0.5621335885750335</recall><recall>0.05216502246054449</recall><recall>0.014835923819053938</recall><recall>0.19320418973449788</recall><recall>0.0006526066196064782</recall><recall>0.006819739174887697</recall><recall>0.013160900162063977</recall></clusters></gold><gold><relation><identifier>17</identifier><frequency>0.09616322293467831</frequency><surfaceform>in country</surfaceform></relation><clusters><recall>0.1576064701064701</recall><recall>0.3152811902811903</recall><recall>0.13208094458094458</recall><recall>0.00029006279006279004</recall><recall>0.21254777504777506</recall><recall>0.08121758121758121</recall><recall>0.0440042315042315</recall><recall>0.008906633906633907</recall><recall>0.04775798525798526</recall><recall>0.0003071253071253071</recall></clusters></gold><gold><relation><identifier>31</identifier><reversed /><frequency>0.07373396318413698</frequency><surfaceform>instance of</surfaceform></relation><clusters><recall>0.008453461470012458</recall><recall>0.010945008008542445</recall><recall>0.18831197722014592</recall><recall>0.0034926143441893574</recall><recall>0.0641350774159103</recall><recall>0.04146645310553479</recall><recall>0.6546093610962804</recall><recall>0.0010455597081331198</recall><recall>0.008275493860117458</recall><recall>0.019264993771133655</recall></clusters></gold><gold><relation><identifier>47</identifier><frequency>0.04465273445077674</frequency><surfaceform>shares border</surfaceform></relation><clusters><recall>0.004645038586737866</recall><recall>0.005047364763541933</recall><recall>0.0</recall><recall>0.00021945064189312754</recall><recall>0.0009875278885190738</recall><recall>0.010935956987674189</recall><recall>0.07475951867159211</recall><recall>0.8948465674262097</recall><recall>0.008412274605903223</recall><recall>0.00014630042792875168</recall></clusters></gold><gold><relation><identifier>47</identifier><reversed /><frequency>0.044644870053949764</frequency><surfaceform>shares border</surfaceform></relation><clusters><recall>0.0047737955346650995</recall><recall>0.005030846063454759</recall><recall>0.0</recall><recall>0.0001836075205640423</recall><recall>0.0009180376028202115</recall><recall>0.009878084606345476</recall><recall>0.07527908343125735</recall><recall>0.8946460047003525</recall><recall>0.009143654524089306</recall><recall>0.00014688601645123384</recall></clusters></gold><gold><relation><identifier>131</identifier><reversed /><frequency>0.044184802839571546</frequency><surfaceform>located in</surfaceform></relation><clusters><recall>0.10592634137221911</recall><recall>0.03701626472237801</recall><recall>0.020975883342680874</recall><recall>0.00048607216302112545</recall><recall>0.027107870630024303</recall><recall>0.02134978500654328</recall><recall>0.05982426621798467</recall><recall>0.043634324172742565</recall><recall>0.68274443821275</recall><recall>0.0009347541596560105</recall></clusters></gold><gold><relation><identifier>17</identifier><reversed /><frequency>0.035592949306098055</frequency><surfaceform>in country</surfaceform></relation><clusters><recall>0.018270290121189865</recall><recall>0.14120455380095484</recall><recall>0.0</recall><recall>0.0004590525156077855</recall><recall>0.02988431876606684</recall><recall>0.12380646345941976</recall><recall>0.13340066103562248</recall><recall>0.042691883951524054</recall><recall>0.5092728608152772</recall><recall>0.0010099155343371283</recall></clusters></gold><gold><relation><identifier>161</identifier><frequency>0.026758610203792735</frequency><surfaceform>cast member of</surfaceform></relation><clusters><recall>0.0002443195699975568</recall><recall>0.0</recall><recall>0.0</recall><recall>0.0</recall><recall>6.10798924993892e-05</recall><recall>0.0</recall><recall>0.013620816027363793</recall><recall>0.0</recall><recall>0.0005497190324945028</recall><recall>0.9855240654776447</recall></clusters></gold><gold><relation><identifier>36</identifier><reversed /><frequency>0.01588083865927763</frequency><surfaceform>capital of</surfaceform></relation><clusters><recall>0.31884800655939327</recall><recall>0.2919954904171364</recall><recall>0.0</recall><recall>0.0005124525981346726</recall><recall>0.013221277031874552</recall><recall>0.009019165727170236</recall><recall>0.005841959618735267</recall><recall>0.244132417751358</recall><recall>0.11581428717843599</recall><recall>0.0006149431177616071</recall></clusters></gold><gold><relation><identifier>57</identifier><frequency>0.01398682975678042</frequency><surfaceform>director of</surfaceform></relation><clusters><recall>0.0</recall><recall>0.0</recall><recall>0.006118366866690199</recall><recall>0.0010589481115425344</recall><recall>0.00011766090128250382</recall><recall>0.0</recall><recall>0.0014119308153900459</recall><recall>0.0</recall><recall>0.0</recall><recall>0.9912930933050947</recall></clusters></gold><gold><relation><identifier>40</identifier><frequency>0.012234379997168816</frequency><surfaceform>has child</surfaceform></relation><clusters><recall>0.004694206008583691</recall><recall>0.00013412017167381974</recall><recall>0.0</recall><recall>0.0</recall><recall>0.00013412017167381974</recall><recall>0.0013412017167381974</recall><recall>0.0008047210300429185</recall><recall>0.011936695278969957</recall><recall>0.007778969957081545</recall><recall>0.973175965665236</recall></clusters></gold><gold><relation><identifier>40</identifier><reversed /><frequency>0.010457026314271783</frequency><surfaceform>has child</surfaceform></relation><clusters><recall>0.003932054105064486</recall><recall>0.0012582573136206354</recall><recall>0.0</recall><recall>0.00015728216420257942</recall><recall>0.00015728216420257942</recall><recall>0.0006291286568103177</recall><recall>0.0004718464926077383</recall><recall>0.009594212016357346</recall><recall>0.004089336269267065</recall><recall>0.9797106008178672</recall></clusters></gold><gold><relation><identifier>54</identifier><frequency>0.009276056057420583</frequency><surfaceform>member of</surfaceform></relation><clusters><recall>0.8068759866690054</recall><recall>0.00017540782318891423</recall><recall>0.0</recall><recall>0.0019294860550780565</recall><recall>0.0014032625855113139</recall><recall>0.009472022452201368</recall><recall>0.03490615681459393</recall><recall>0.004911419049289599</recall><recall>0.09822838098579197</recall><recall>0.04209787756533941</recall></clusters></gold><gold><relation><identifier>36</identifier><frequency>0.008716373149900647</frequency><surfaceform>capital of</surfaceform></relation><clusters><recall>0.12012925299372743</recall><recall>0.03497433947918647</recall><recall>0.0</recall><recall>0.0005702337958563011</recall><recall>0.0034214027751378064</recall><recall>0.01596654628397643</recall><recall>0.024139897357916745</recall><recall>0.35050370651967305</recall><recall>0.44858391940695685</recall><recall>0.0017107013875689032</recall></clusters></gold></confusion>+ \ No newline at end of file diff --git a/mainmatter/fitb/confusion regularized vae.xml b/mainmatter/fitb/confusion regularized vae.xml @@ -0,0 +1 @@ +<confusion><gold><relation><identifier>131</identifier><frequency>0.16362139745088683</frequency><surfaceform>located in</surfaceform></relation><clusters><recall>0.45699269196115727</recall><recall>0.1832715987586345</recall><recall>0.07916708379217138</recall><recall>0.004605065572129342</recall><recall>0.012333566923615978</recall><recall>0.17313044348783663</recall><recall>0.011762939233156472</recall><recall>0.07642406647312043</recall><recall>0.0004905395935529082</recall><recall>0.0018220042046250876</recall></clusters></gold><gold><relation><identifier>31</identifier><frequency>0.1504288717736312</frequency><surfaceform>instance of</surfaceform></relation><clusters><recall>0.0005764691806523891</recall><recall>0.006972014052795876</recall><recall>0.003295663429012715</recall><recall>0.7884140571465863</recall><recall>0.014738032826112966</recall><recall>0.01818597113303386</recall><recall>0.0032739098750258324</recall><recall>0.01812071047107321</recall><recall>0.13974483081173386</recall><recall>0.0066783410739729605</recall></clusters></gold><gold><relation><identifier>17</identifier><frequency>0.09616322293467831</frequency><surfaceform>in country</surfaceform></relation><clusters><recall>0.40194171444171445</recall><recall>0.007353944853944854</recall><recall>0.12295249795249795</recall><recall>0.0006654381654381654</recall><recall>0.017403767403767405</recall><recall>0.3734302484302484</recall><recall>0.014503139503139504</recall><recall>0.05041973791973792</recall><recall>0.0011602511602511602</recall><recall>0.01016926016926017</recall></clusters></gold><gold><relation><identifier>31</identifier><reversed /><frequency>0.07373396318413698</frequency><surfaceform>instance of</surfaceform></relation><clusters><recall>0.002135611318739989</recall><recall>0.003781811710268731</recall><recall>0.031789464317494215</recall><recall>0.08497953372486207</recall><recall>0.013036127424808685</recall><recall>0.006473571809930592</recall><recall>0.7165198433885033</recall><recall>0.002580530343477487</recall><recall>0.13572254849617368</recall><recall>0.0029809574657412353</recall></clusters></gold><gold><relation><identifier>47</identifier><frequency>0.04465273445077674</frequency><surfaceform>shares border</surfaceform></relation><clusters><recall>0.11404118357046195</recall><recall>0.1480194579569145</recall><recall>0.05804469478073224</recall><recall>0.0035112102702900406</recall><recall>0.02095753630079368</recall><recall>0.27186277019860283</recall><recall>0.022420540580081198</recall><recall>0.2983431476537069</recall><recall>0.007022420540580081</recall><recall>0.055777038147836584</recall></clusters></gold><gold><relation><identifier>47</identifier><reversed /><frequency>0.044644870053949764</frequency><surfaceform>shares border</surfaceform></relation><clusters><recall>0.1128084606345476</recall><recall>0.14817126909518213</recall><recall>0.05805669800235018</recall><recall>0.0036354289071680377</recall><recall>0.02008666274970623</recall><recall>0.27144535840188017</recall><recall>0.02287749706227967</recall><recall>0.2986559929494712</recall><recall>0.006756756756756757</recall><recall>0.05750587544065805</recall></clusters></gold><gold><relation><identifier>131</identifier><reversed /><frequency>0.044184802839571546</frequency><surfaceform>located in</surfaceform></relation><clusters><recall>0.09396148812862218</recall><recall>0.12106935875864648</recall><recall>0.12712656571321743</recall><recall>0.005645915124322303</recall><recall>0.011927463077210694</recall><recall>0.010057954757898672</recall><recall>0.007702374275565527</recall><recall>0.6162273322116283</recall><recall>0.002056459151243223</recall><recall>0.004225088801645167</recall></clusters></gold><gold><relation><identifier>17</identifier><reversed /><frequency>0.035592949306098055</frequency><surfaceform>in country</surfaceform></relation><clusters><recall>0.013082996694821887</recall><recall>0.004590525156077855</recall><recall>0.23774329783327214</recall><recall>0.003396988615497613</recall><recall>0.015470069775982372</recall><recall>0.05375504957767169</recall><recall>0.005233198677928755</recall><recall>0.6518086669114946</recall><recall>0.0012853470437017994</recall><recall>0.01363385971355123</recall></clusters></gold><gold><relation><identifier>161</identifier><frequency>0.026758610203792735</frequency><surfaceform>cast member of</surfaceform></relation><clusters><recall>0.0</recall><recall>0.0</recall><recall>0.001221597849987784</recall><recall>0.0</recall><recall>6.10798924993892e-05</recall><recall>0.0</recall><recall>0.0001221597849987784</recall><recall>0.0018934766674810653</recall><recall>0.9215734180307843</recall><recall>0.07512826777424872</recall></clusters></gold><gold><relation><identifier>36</identifier><reversed /><frequency>0.01588083865927763</frequency><surfaceform>capital of</surfaceform></relation><clusters><recall>0.5466844316900686</recall><recall>0.10710259301014656</recall><recall>0.0893717331146869</recall><recall>0.002254791431792559</recall><recall>0.015783540022547914</recall><recall>0.03679409654606949</recall><recall>0.005022035461719791</recall><recall>0.18735266987803628</recall><recall>0.0013323767551501487</recall><recall>0.008301732089781695</recall></clusters></gold><gold><relation><identifier>57</identifier><frequency>0.01398682975678042</frequency><surfaceform>director of</surfaceform></relation><clusters><recall>0.0</recall><recall>0.00035298270384751147</recall><recall>0.0007059654076950229</recall><recall>0.00011766090128250382</recall><recall>0.0</recall><recall>0.0</recall><recall>0.00023532180256500765</recall><recall>0.006000705965407695</recall><recall>0.9034004000470643</recall><recall>0.0891869631721379</recall></clusters></gold><gold><relation><identifier>40</identifier><frequency>0.012234379997168816</frequency><surfaceform>has child</surfaceform></relation><clusters><recall>0.0</recall><recall>0.008717811158798282</recall><recall>0.006035407725321888</recall><recall>0.002682403433476395</recall><recall>0.0018776824034334764</recall><recall>0.0</recall><recall>0.007778969957081545</recall><recall>0.0065718884120171675</recall><recall>0.22331008583690987</recall><recall>0.7430257510729614</recall></clusters></gold><gold><relation><identifier>40</identifier><reversed /><frequency>0.010457026314271783</frequency><surfaceform>has child</surfaceform></relation><clusters><recall>0.0</recall><recall>0.008021390374331552</recall><recall>0.007549543881723813</recall><recall>0.003617489776659327</recall><recall>0.0029883611198490093</recall><recall>0.00015728216420257942</recall><recall>0.011953444479396037</recall><recall>0.009594212016357346</recall><recall>0.23026108839257628</recall><recall>0.725857187794904</recall></clusters></gold><gold><relation><identifier>54</identifier><frequency>0.009276056057420583</frequency><surfaceform>member of</surfaceform></relation><clusters><recall>0.0008770391159445711</recall><recall>0.6621645325381512</recall><recall>0.034204525521838273</recall><recall>0.0017540782318891423</recall><recall>0.0019294860550780565</recall><recall>0.00017540782318891423</recall><recall>0.00456060340291177</recall><recall>0.003157340817400456</recall><recall>0.026311173478337134</recall><recall>0.26486581301526047</recall></clusters></gold><gold><relation><identifier>36</identifier><frequency>0.008716373149900647</frequency><surfaceform>capital of</surfaceform></relation><clusters><recall>0.11556738262687702</recall><recall>0.02109865044668314</recall><recall>0.2069948678958373</recall><recall>0.0028511689792815056</recall><recall>0.01539631248812013</recall><recall>0.027751378065006653</recall><recall>0.0043717924348983086</recall><recall>0.5873408097319901</recall><recall>0.008363429005892415</recall><recall>0.010264208325413419</recall></clusters></gold></confusion>+ \ No newline at end of file diff --git a/mainmatter/fitb/confusion vae.xml b/mainmatter/fitb/confusion vae.xml @@ -0,0 +1 @@ +<confusion><gold><relation><identifier>131</identifier><frequency>0.16362139745088683</frequency><surfaceform>located in</surfaceform></relation><clusters><recall>0.06396035639203124</recall><recall>0.5036640304334769</recall><recall>0.005155671238362199</recall><recall>0.002733006306937631</recall><recall>0.10375412954249674</recall><recall>0.12840124136550204</recall><recall>0.05251776954650115</recall><recall>0.06661327460206228</recall><recall>0.06821503654019422</recall><recall>0.004985484032435679</recall></clusters></gold><gold><relation><identifier>31</identifier><frequency>0.1504288717736312</frequency><surfaceform>instance of</surfaceform></relation><clusters><recall>0.005525402712668182</recall><recall>0.0018708056428719043</recall><recall>0.3438910582016337</recall><recall>0.24615234013857015</recall><recall>0.2639793776308204</recall><recall>0.008288104069002274</recall><recall>0.0375248806273725</recall><recall>0.009125615897497254</recall><recall>0.06730549603541479</recall><recall>0.01633691904414884</recall></clusters></gold><gold><relation><identifier>17</identifier><frequency>0.09616322293467831</frequency><surfaceform>in country</surfaceform></relation><clusters><recall>0.009213759213759214</recall><recall>0.14056101556101555</recall><recall>0.09237646737646737</recall><recall>0.06284125034125033</recall><recall>0.040455227955227956</recall><recall>0.5611179361179361</recall><recall>0.0611008736008736</recall><recall>0.013513513513513514</recall><recall>0.016175266175266174</recall><recall>0.0026446901446901447</recall></clusters></gold><gold><relation><identifier>31</identifier><reversed /><frequency>0.07373396318413698</frequency><surfaceform>instance of</surfaceform></relation><clusters><recall>0.004182238832532479</recall><recall>0.06199946609717032</recall><recall>0.09861630183306638</recall><recall>0.0011567894643174943</recall><recall>0.20397312689090585</recall><recall>0.002224595123687489</recall><recall>0.4453194518597615</recall><recall>0.007296672005694963</recall><recall>0.17198344901227977</recall><recall>0.003247908880583734</recall></clusters></gold><gold><relation><identifier>47</identifier><frequency>0.04465273445077674</frequency><surfaceform>shares border</surfaceform></relation><clusters><recall>0.0587396218133938</recall><recall>0.10215427380125087</recall><recall>0.025309974031674044</recall><recall>0.014886068541750485</recall><recall>0.05383855747778062</recall><recall>0.04410957902051864</recall><recall>0.21520792948319373</recall><recall>0.30360996305914195</recall><recall>0.15760213598624775</recall><recall>0.024541896785048097</recall></clusters></gold><gold><relation><identifier>47</identifier><reversed /><frequency>0.044644870053949764</frequency><surfaceform>shares border</surfaceform></relation><clusters><recall>0.060553760282021155</recall><recall>0.10102085781433608</recall><recall>0.025851938895417155</recall><recall>0.014725323149236193</recall><recall>0.05390716803760282</recall><recall>0.04461662749706228</recall><recall>0.2151880141010576</recall><recall>0.3025484723854289</recall><recall>0.1570945945945946</recall><recall>0.024493243243243243</recall></clusters></gold><gold><relation><identifier>131</identifier><reversed /><frequency>0.044184802839571546</frequency><surfaceform>located in</surfaceform></relation><clusters><recall>0.2236679753224902</recall><recall>0.08696952701439521</recall><recall>0.01697513553935315</recall><recall>0.028042624789680313</recall><recall>0.18810992708917554</recall><recall>0.0029912133108992334</recall><recall>0.07320994578425874</recall><recall>0.154122265844083</recall><recall>0.09212936997569639</recall><recall>0.13378201532996822</recall></clusters></gold><gold><relation><identifier>17</identifier><reversed /><frequency>0.035592949306098055</frequency><surfaceform>in country</surfaceform></relation><clusters><recall>0.17485310319500552</recall><recall>0.04126882115313992</recall><recall>0.030113845023870732</recall><recall>0.05614212265883217</recall><recall>0.0763863385971355</recall><recall>0.02217223650385604</recall><recall>0.3518178479618068</recall><recall>0.060227690047741464</recall><recall>0.03929489533602644</recall><recall>0.1477230995225854</recall></clusters></gold><gold><relation><identifier>161</identifier><frequency>0.026758610203792735</frequency><surfaceform>cast member of</surfaceform></relation><clusters><recall>0.008673344734913267</recall><recall>0.005436110432445639</recall><recall>0.0005497190324945028</recall><recall>0.024004397752259956</recall><recall>0.009711702907402883</recall><recall>0.00983386269240166</recall><recall>0.004703151722452968</recall><recall>0.02449303689225507</recall><recall>0.6073784510139262</recall><recall>0.3052162228194478</recall></clusters></gold><gold><relation><identifier>36</identifier><reversed /><frequency>0.01588083865927763</frequency><surfaceform>capital of</surfaceform></relation><clusters><recall>0.06672132827713437</recall><recall>0.2031362099005842</recall><recall>0.00020498103925386903</recall><recall>0.0030747155888080353</recall><recall>0.08383724505483242</recall><recall>0.055652352157425436</recall><recall>0.08629701752587886</recall><recall>0.3730654914420416</recall><recall>0.11858153120836322</recall><recall>0.009429127805677975</recall></clusters></gold><gold><relation><identifier>57</identifier><frequency>0.01398682975678042</frequency><surfaceform>director of</surfaceform></relation><clusters><recall>0.0011766090128250382</recall><recall>0.0018825744205200612</recall><recall>0.0</recall><recall>0.004118131544887634</recall><recall>0.005883045064125191</recall><recall>0.002706200729497588</recall><recall>0.0014119308153900459</recall><recall>0.015531238969290505</recall><recall>0.7151429579950582</recall><recall>0.2521473114484057</recall></clusters></gold><gold><relation><identifier>40</identifier><frequency>0.012234379997168816</frequency><surfaceform>has child</surfaceform></relation><clusters><recall>0.008986051502145922</recall><recall>0.02052038626609442</recall><recall>0.0037553648068669528</recall><recall>0.007913090128755365</recall><recall>0.050160944206008584</recall><recall>0.009120171673819743</recall><recall>0.01072961373390558</recall><recall>0.14578862660944206</recall><recall>0.7288090128755365</recall><recall>0.014216738197424892</recall></clusters></gold><gold><relation><identifier>40</identifier><reversed /><frequency>0.010457026314271783</frequency><surfaceform>has child</surfaceform></relation><clusters><recall>0.007706826045926392</recall><recall>0.03129915067631331</recall><recall>0.0</recall><recall>0.004403900597672224</recall><recall>0.054105064485687325</recall><recall>0.0015728216420257944</recall><recall>0.004718464926077383</recall><recall>0.12283737024221453</recall><recall>0.7551116703365839</recall><recall>0.018244731047499213</recall></clusters></gold><gold><relation><identifier>54</identifier><frequency>0.009276056057420583</frequency><surfaceform>member of</surfaceform></relation><clusters><recall>0.15593755481494476</recall><recall>0.2785476232239958</recall><recall>0.0</recall><recall>0.009822838098579197</recall><recall>0.03648482722329416</recall><recall>0.03736186633923873</recall><recall>0.00035081564637782847</recall><recall>0.007717944220312226</recall><recall>0.4427293457288195</recall><recall>0.03104718470443782</recall></clusters></gold><gold><relation><identifier>36</identifier><frequency>0.008716373149900647</frequency><surfaceform>capital of</surfaceform></relation><clusters><recall>0.11879870747006273</recall><recall>0.10036114807070899</recall><recall>0.0</recall><recall>0.008363429005892415</recall><recall>0.1064436418931762</recall><recall>0.0013305455236647026</recall><recall>0.11480707089906862</recall><recall>0.3581068237977571</recall><recall>0.15168218969777608</recall><recall>0.040106443641893176</recall></clusters></gold></confusion>+ \ No newline at end of file diff --git a/mainmatter/fitb/experiments.tex b/mainmatter/fitb/experiments.tex @@ -0,0 +1,157 @@ +\section{Experiments} +\label{sec:fitb:experiments} +To compare with previous works, we repeat the experimental setup of \textcite{vae_re} with the \bcubed{} evaluation metric \parencite{bcubed}. +We complemented this setup with two additional datasets extracted from \textsc{t-re}x \parencite{trex} and two more metrics commonly seen in clustering task evaluation: V-measure \parencite{v-measure} and \textsc{ari} \parencite{ari}. +This allows us to capture the characteristics of each approach in more detail. + +In this section, we begin by describing the processing of the datasets in Section~\ref{sec:fitb:datasets}. +We then describe the experimental details of the models we evaluated in Section~\ref{sec:fitb:baselines}. +Finally, we give quantitative results in Section~\ref{sec:fitb:quantitative} and qualitative results in Section~\ref{sec:fitb:qualitative} +The description of the metrics can be found in Section~\ref{sec:relation extraction:clustering}. +Appendix~\ref{chap:datasets} gives further details on the source datasets, their specificities, their sizes and some example of their content when appropriate. + +\subsection{Datasets} +\label{sec:fitb:datasets} +As explained in Section~\ref{sec:relation extraction:unsupervised evaluation}, to evaluate the models, we use labeled datasets, the labels being used for validation and testing. +The first dataset we consider is the one of \textcite{vae_re}, which is similar to the one used in \textcite{rellda}. +This dataset was built through distant supervision (Section~\ref{sec:relation extraction:distant supervision}) by aligning sentences from the New York Times corpus (\textsc{nyt}, Section~\ref{sec:datasets:nyt}, \cite{nyt}) with Freebase (\textsc{fb}, Section~\ref{sec:datasets:freebase}, \cite{freebase}) facts. +Several sentences were filtered out based on features like the length of the dependency path between the two entities, resulting in 2 million sentences with only 41\,000 (2\%) of them labeled with one of 262 possible relations. +20\% of the labeled sentences were set aside for validation; the remaining 80\% are used to compute the final results. + +We also extracted two datasets from \textsc{t-re}x (Section~\ref{sec:datasets:trex}, \cite{trex}), which was built as an alignment of Wikipedia with Wikidata (Section~\ref{sec:datasets:wikidata}, \cite{wikidata}). +We only consider \((s, e_1, e_2)\) triplets where both entities appear in the same sentence.% +\sidenote{ + \textsc{t-re}x provides annotations for whole articles; it should therefore be possible to process broader contexts by defining \(\sentenceSet\) as a set of articles. + However, in this work, we stay in the traditional sentence-level relation extraction setup. +} +If a single sentence contains multiple triplets, it appears multiple times in the dataset, each time with a different pair of tagged entities. +We built the first dataset \textsc{ds} by extracting all triplets of \textsc{t-re}x where the two entities are linked by a relation in Wikidata. +This is the usual distant supervision method. +It results in 1\,189 relations and nearly 12 million sentences, all of them labeled with a relation. + +In Wikidata, each relation is annotated with a list of associated surface forms; for example, ``\textsl{shares border with}'' can be conveyed by ``borders,'' ``adjacent to,'' ``next to,'' etc. +The second dataset we built, \textsc{spo}, only contains the sentences where a surface form of the relation also appears in the sentence, resulting in 763\,000 samples (6\% of the unfiltered dataset) and 615 relations. +This dataset still contains some misalignment, but it should nevertheless be easier for models to extract the correct semantic relation since the set of surface forms is much more restricted and much more regular. + +\subsection{Baselines and Models} +\label{sec:fitb:baselines} +We compare our model with three state-of-the-art approaches, two generative rel-\textsc{lda} models of \textcite{rellda}, the \textsc{vae} model of \textcite{vae_re} and the deep clustering of \textsc{bert} representations by \textcite{selfore}. + +The two rel-\textsc{lda} models only differ by the number of features considered. +We use the eight features listed in \textcite{vae_re}: +\begin{enumerate} + \item the bag of words of the infix; + \item the surface form of the entities; + \item the lemma words on the dependency path; + \item the \textsc{pos} of the infix words; + \item the type of the entity pair (e.g.\ person--location); + \item the type of the head entity (e.g.\ person); + \item the type of the tail entity (e.g.\ location); + \item the words on the dependency path between the two entities. +\end{enumerate} +Rel-\textsc{lda} uses the first three features, while rel-\textsc{lda}1 is trained by iteratively adding more features until all eight are used. + +To assess our two main contributions individually, we evaluate the \textsc{pcnn} classifier and our additional losses separately. +More precisely, we first study the effect of the RelDist losses by looking at the differences between models optimizing \(\loss{ep}+\loss{vae reg}\) and the ones optimizing \(\loss{ep}+\loss{s}+\loss{d}\) with \loss{ep} being either computed using the relation classifier of \textcite{vae_re} or our \textsc{pcnn}. +Second, we study the effect of the relation classifier by comparing the feature-based classifier and the \textsc{pcnn} trained with the same losses. +We also give results for our RelDist losses together with a \bertcoder{} classifier. +This latter combination is evaluated by \textcite{selfore} following our experimental setup. +We thus focus mainly on four models: +\begin{itemize} + \item \(\text{Linear}+\loss{vae reg}\), which corresponds to the model of \textcite{vae_re}; + \item \(\text{Linear}+\loss{s}+\loss{d}\), which uses the feature-based linear encoder of \textcite{vae_re} together with our RelDist losses; + \item \(\textsc{pcnn}+\loss{vae reg}\), which uses our \textsc{pcnn} encoder together with the regularization of \textcite{vae_re}; + \item \(\textsc{pcnn}+\loss{s}+\loss{d}\), which is our complete model. +\end{itemize} + +All models are trained with ten relation classes, which, while lower than the number of actual relations, allows us to compare the models faithfully since the distribution of gold relations is very unbalanced. +For feature-based models, the size of the features domain range from 1 to 10~million values depending on the dataset. +We train our models with Adam using \(L_2\) regularization on all parameters. +To have a good estimation of \(P(\rndm{R})\) in the computation of \(\loss{d}\), we use a batch size of 100. +Our word embeddings are of size 50, entities embeddings of size \(m=10\). +We sample \(k=5\) negative samples to estimate \(\loss{ep}\). +Lastly, we set \(\alpha=0.01\) and \(\beta=0.02\). +All three datasets come with a validation set, and following \textcite{vae_re}, we used it for cross-validation to optimize the \bcubed{} \fone{}. + +\subsection{Results} +\label{sec:fitb:quantitative} +\begin{table*}[t] + \centering + \input{mainmatter/fitb/quantitative.tex} + \scaption[Quantitative results of clustering models.]{ + Results (percentage) on our three datasets. + The results for rel-\textsc{lda}, rel-\textsc{lda}1, Linear and \textsc{pcnn} are our own, while results for \bertcoder{} and Self\textsc{ore}, marked with \(^\dagger\), are from \textcite{selfore}. + The best results at the time of publication of our article are in \strong{bold}, while the best results at the time of writing are in \emph{italic}. + \label{tab:fitb:quantitative} + } +\end{table*} +The results reported in Table~\ref{tab:fitb:quantitative} are the average test scores of three runs on the \nytfb{} and \trexspo{} datasets, using different random initialization of the parameters---in practice, the variance was low enough so that reported results can be analyzed. +We observe that regardless of the model and metrics, the highest measures are obtained on \trexspo{}, then \nytfb{} and finally \trexds{}. +This was to be expected since \trexspo{} was built to be easy, while hard-to-process sentences were filtered out of \nytfb{} \parencite{rellda, vae_re}. +We also observe that the main metrics agree in general (\bcubed{}, V-measure and \textsc{ari}) in most cases. +Performing a \textsc{pca} on the measures, we observed that V-measure forms a nearly-orthogonal axis to \bcubed{}, and to a lesser extent \textsc{ari}. +Hence we can focus on \bcubed{} and V-measure in our analysis. + +We first measure the benefit of our RelDist losses: on all datasets and metrics, the two models using \(\loss{s}+\loss{d}\) are systematically better than the ones using \loss{vae reg}: +\begin{itemize} + \item The \textsc{pcnn} models consistently gain between 7 and 11 points in \bcubed{} \fone{} from these additional losses; + \item The feature-based linear classifier benefits from the RelDist losses to a lesser extent, except on the \trexds{} dataset on which the \(\text{Linear}+\loss{vae reg}\) model without the RelDist losses completely collapses---we hypothesize that this dataset is too hard for the model given the number of parameters to estimate. +\end{itemize} + +We now restrict to discriminative models based on \(\loss{s}+\loss{d}\). +We note that both relation classifier (Linear and \textsc{pcnn}) exhibit better performances than generative ones (rel-\textsc{lda}, rel-\textsc{lda}1) with a difference ranging from 2.5/0.6 (\nytfb{}, for Linear/\textsc{pcnn}) to 11/17.8 (on \trexspo{}). +However, the advantage of \textsc{pcnn}s over feature-based classifiers is not completely clear. +While the \textsc{pcnn} version has a systematically better \bcubed{} \fone{} on all datasets (differences of 1.9/6.8/0.2 respectively for \nytfb{}/\trexspo{}/\trexds{}), the V-measure decreases by 0.4/4.0 on respectively \nytfb{}/\trexds{}, and \textsc{ari} by 2.1 on \trexds{}. +As \bcubed{} \fone{} was used for validation, this shows that the \textsc{pcnn} models overfit this metric by polluting relatively clean clusters with unrelated sentences or degrades well clustered gold relations by splitting them into two clusters. + +The \bertcoder{} classifier improves all metrics consistently, with the sole exception of the V-measure on the \trexspo{} dataset. +This can be explained both by the larger expressive power of \textsc{bert} and by its pretraining as a language model. +The Self\textsc{ore} model, which is built on top of a \bertcoder{} further improves the results on all datasets. +Since these results are from a subsequent work \parencite{selfore}, we won't delve too much into details. +As mentioned in Section~\ref{sec:relation extraction:selfore}, Self\textsc{ore} is an iterative algorithm; the \hypothesis{uniform} assumption is enforced on the whole dataset at once, thus solving \problem{2}. +While to solve \problem{1}, Self\textsc{ore} uses a concentration objective (through the square in the target distribution \(\mtrx{P}\) in Equation~\ref{eq:relation extraction:selfore target}). +While the \bertcoder{} can replace our \textsc{pcnn} classifier and can be evaluated with our regularization losses, the Self\textsc{ore} algorithm is a replacement for the \(\loss{ep}+\loss{s}+\loss{d}\) and can't be use jointly with \(\loss{s}+\loss{d}\). +In theory, the Self\textsc{ore} algorithm could be used with a linear or \textsc{pcnn} encoder. +However, Self\textsc{ore} strongly relies on a good initial representation; such a model would need to be pre-trained as a language model beforehand. + +\subsection{Qualitative Analysis} +\label{sec:fitb:qualitative} +\begin{figure*}[t] + \centering + \renderConfusions + {mainmatter/fitb/confusion lda.xml}{Rel-\textsc{lda}1} + {mainmatter/fitb/confusion vae.xml}{\(\text{Linear}+\loss{vae reg}\)} + {mainmatter/fitb/confusion regularized vae.xml}{\(\text{Linear}+\loss{s}+\loss{d}\)} + {mainmatter/fitb/confusion pcnn.xml}{\(\textsc{pcnn}+\loss{s}+\loss{d}\)} + \vspace{-7mm} + \scaption[Confusion matrices on the \trexspo{} dataset.]{ + Normalized confusion matrices for the \trexspo{} dataset. + For each model, each of the 10 columns corresponds to a predicted relation cluster, which were sorted to ease comparison. + The rows identify Wikidata relations sorted by their frequency in the \trexspo{} corpus (reported as percentage in front of each relation name). + The area of each circle is proportional to the number of sentences in the cell. + For clarity, the matrix was normalized so that each row sum to 1, thus it is more akin to a \bcubed{} per-item recall than a true confusion matrix. + \label{fig:fitb:confusion} + } +\end{figure*} + +Since, for our model of interest, all the metrics agree on the \trexspo{} dataset, we plot the confusion matrix of our models in Figure~\ref{fig:fitb:confusion}. +Each row is labeled with the gold Wikidata relation extracted through distant supervision. +For example, the top left cell of each matrix correspond to the value \(P\mkern1mu\big(c(\rndm{X})=0\mathrel{\big|} g(\rndm{X})=\text{``}\sfTripletHolds{e_1}{located in}{e_2}\text{''}\big)\) using the notation of Section~\ref{sec:relation extraction:unsupervised evaluation}. +Since relations are generally not symmetric, each Wikidata relation appears twice in the table, once for each disposition of the entities in the sentence. +This is particularly problematic with symmetric relations such as ``shares border,'' which are two different gold relations that actually convey the same semantic relation. + +To interpret Figure~\ref{fig:fitb:confusion}, we have to see whether a predicted cluster (column) contains different gold relations---paying attention to the fact that the most important gold relations are listed in the top rows (the top 5 relations account for 50\% of sentences). +The first thing to notice is that the confusion matrix of both models using our RelDist losses (\(\loss{s}+\loss{d}\)) are sparser (for each column), which means that our models better separate relations from each other. +We observe that \(\text{Linear}+\loss{vae reg}\) (the model of the model of \cite{vae_re}) is affected by the pitfall \problem{1} (uniform distribution) for many gold clusters. +The \loss{vae reg} loss forces the classifier to be uncertain about which relation is expressed, translating into a dense confusion matrix and resulting in poor performances. +The rel-\textsc{lda}1 model is even worse and fails to identify clear clusters, showing the limitations of a purely generative approach that might focus on features not linked with any relation. + +Focusing on our proposed model, \(\textsc{pcnn}+\loss{s}+\loss{d}\) (rightmost figure), we looked at two different mistakes. +The first is a gold cluster divided in two (low recall). +When looking at clusters 0 and 1, we did not find any recognizable pattern. +Moreover, the corresponding entity predictor parameters are very similar. +This seems to be a limitation of the distance loss: splitting a large cluster in two may improve \loss{d} but worsen all the evaluation metrics. +The model is then penalized by the fact that it lost one slot to transmit information between the classifier and the entity predictor. +The second type of mistake is when a predicted cluster corresponds to two gold ones (low precision). +Here, most of the mistakes seem understandable: ``shares border'' is symmetric (cluster 7), ``located in'' and ``in country'' (cluster 8) or ``cast member'' and ``director of'' (cluster 9) are clearly related. +Note that other variants are also affected similarly, showing that the problem of granularity is complex. diff --git a/mainmatter/fitb/fitb split.tex b/mainmatter/fitb/fitb split.tex @@ -0,0 +1,23 @@ +\begin{tikzpicture}[ + piece/.style={text depth=0, minimum width=7mm}, + defbrace/.style={decorate, decoration={brace, amplitude=5}}, + undernote/.style={below, midway, text depth=0, yshift=-1mm}, + overnote/.style={above, midway, text depth=0, yshift=2mm}, + baseline=(taillabel.base), + ] + %The <e1>sol</e1> was the currency of <e2>Peru</e2> between 1863 and 1985 . + \matrix[name=M1, matrix of nodes, inner sep=0.8mm, column sep=0]{ + \node[piece] (prefix) {The}; & + \node[piece] (head) {\textbf{sol}}; & + \node[piece] (infix) {was the currency of}; & + \node[piece] (tail) {\textbf{Peru}}; & + \node[piece] (suffix) {between 1863 and 1985.}; \\ + }; + + \draw [defbrace] (prefix.south east) -- (prefix.south west) node [undernote] {\strut prefix}; + \draw [defbrace] (infix.south east) -- (infix.south west) node [undernote] {\strut infix}; + \draw [defbrace] (suffix.south east) -- (suffix.south west) node [undernote] {\strut suffix}; + \draw [defbrace] (head.north west) -- (head.north east) node [overnote] {\strut head entity}; + \draw [defbrace] (tail.north west) -- (tail.north east) node [overnote] (taillabel) {\strut tail entity}; +\end{tikzpicture} +\vspace{-2mm} diff --git a/mainmatter/fitb/gumbel.tex b/mainmatter/fitb/gumbel.tex @@ -0,0 +1,12 @@ +\adjustbox{valign=t}{% +\begin{tabular}{c r r r r r r r} + \toprule + \multirow{2}{*}{\problem{1} solution} & \multicolumn{3}{c}{\bcubed} & \multicolumn{3}{c}{V-measure} & \multirow{2}{*}{\textsc{ari}} \\ + \cmidrule(lr){2-4}\cmidrule(lr){5-7} + & \fone & Prec. & Rec. & \fone & Hom. & Comp. & \\ + \midrule + \loss{s} regularization & 39.4 & 32.2 & 50.7 & 38.3 & 32.2 & 47.2 & 33.8 \\ + Gumbel--Softmax & 35.0 & 29.9 & 42.2 & 33.2 & 28.3 & 40.2 & 25.1 \\ + \bottomrule +\end{tabular}% +}% diff --git a/mainmatter/fitb/introduction.tex b/mainmatter/fitb/introduction.tex @@ -0,0 +1,43 @@ +\begin{marginparagraph} + \citationBadness + This chapter is an adaptation of an article published at \textsc{acl} with some supplementary results:\\ + \fullcite{fitb} +\end{marginparagraph} +All the works presented thus far follow the same underlying dynamic. +There is a movement away from symbolic representations toward distributed ones, as well as a movement away from shallow models toward deeper ones. +This can be seen in word, sentence and knowledge base representations (Chapter~\ref{chap:context}), as well as in relation extraction (Chapter~\ref{chap:relation extraction}). +As we exposed in Chapter~\ref{chap:relation extraction}, a considerable amount of work has been conducted on supervised or weakly-supervised relation extraction (Sections~\ref{sec:relation extraction:sentential} and~\ref{sec:relation extraction:aggregate}), with recent state-of-the-art models using deep neural networks (Section~\ref{sec:relation extraction:pcnn}). +However, human annotation of text with knowledge base triplets is expensive and virtually impractical when the number of relations is large. +Weakly-supervised methods such as distant supervision (Section~\ref{sec:relation extraction:distant supervision}) are also restricted to a handcrafted relation domain. +Going further, purely unsupervised relation extraction methods working on raw texts, without any access to a knowledge base, have been developed (Section~\ref{sec:relation extraction:unsupervised}). + +The first unsupervised models used a clustering (Section~\ref{sec:relation extraction:hasegawa}) or generative (Section~\ref{sec:relation extraction:rellda}) approach. +The latter, which obtained state-of-the-art performance, still makes a lot of simplifying hypotheses, such as \hypothesis{biclique}, assuming that the entities are conditionally independent between themselves given the relation. +We posit that discriminative approaches can help further expressiveness, especially considering recent results with neural network models. +The open question then becomes how to provide a sufficient learning signal to the classifier. +The \textsc{vae} model of \textcite{vae_re} introduced in Section~\ref{sec:relation extraction:vae} followed this path by leveraging representation learning for modeling knowledge bases and proposed to use an auto-encoder model: their encoder extracts the relation from a sentence that the decoder uses to predict a missing entity. +However, their encoder is still limited compared to its supervised counterpart (e.g.~\textsc{pcnn}) and relies on handcrafted features extracted by natural language processing tools (Section~\ref{sec:relation extraction:hand-designed features}). +These features tend to contain errors and prevent the discovery of new patterns, which might hinder performances. + +While the transition to deep learning approaches can bring more expressive models to the task, it also raises new problems. +This chapter tackles a problem specific to unsupervised discriminative relation extraction models. +In particular, we focus on the \textsc{vae} model of Section~\ref{sec:relation extraction:vae}. +These models tend to be hard to train because of the way \hypothesis{uniform} is enforced, expressly, how we ensure that all relations are conveyed the same amount of time.% +\sidenote{However, this problem can be generalized to how we enforce all relations are conveyed reasonably often.} +To tackle this issue, we propose two new regularizing losses on the distribution of relations. +With these, we hope to leverage the expressivity of discriminative approaches---in particular, of deep neural network classifiers---while staying in an unsupervised setting. +Indeed, these models are hard to train without supervision, and the solutions proposed at the time were unstable. +Discriminative approaches have less inductive bias, but this makes them more sensitive to noise. + +Indeed, our initial experiments showed that the \textsc{vae} relation extraction model was unstable, especially when using a deep neural network relation classifier. +It converges to either of the two following regimes, depending on hyperparameter settings: always predicting the same relation or predicting a uniform distribution. +To overcome these limitations, we propose to use two new losses alongside an entity prediction loss based on a fill-in-the-blank task and show experimentally that this is key to learning deep neural network models. +Our contributions are the following: +\begin{itemize} + \item We propose two RelDist losses: a skewness loss, which encourages the classifier to predict a class with confidence for a single sentence, and a distribution distance loss, which encourages the classifier to scatter a set of sentences into different classes; + \item We perform extensive experiments on the usual \nytfb{} dataset, as well as two new datasets; + \item We show that our RelDist losses allow us to train a deep \textsc{pcnn} classifier and improve the performances of feature-based models. +\end{itemize} + +In this chapter, we first describe our model in Section~\ref{sec:fitb:model} before revisiting the related works pertinent to the experimental setup in Section~\ref{sec:fitb:related work}. +We present our main experimental results in Section~\ref{sec:fitb:experiments} before studying some possible improvements we considered in Section~\ref{sec:fitb:variants}. diff --git a/mainmatter/fitb/model.tex b/mainmatter/fitb/model.tex @@ -0,0 +1,232 @@ +\section{Model description} +\label{sec:fitb:model} +Our model focuses on extracting the relation between two entities in textual data and assumes that an entity chunker has identified named entities in the text. +Furthermore, following Section~\ref{sec:relation extraction:definition}, we limit ourselves to binary relations and therefore consider sentences with two tagged entities, as shown in Figure~\ref{fig:fitb:split}. +These sentences constitute the set \(\sentenceSet\). +We further assume that entity linking was performed and that we have access to entity identifiers from the set \(\entitySet\). +We therefore consider samples from a dataset \(\dataSet\subseteq\sentenceSet\times\entitySet^2\). +From these samples we learn a relation classifier that maps each sample \(x\in\dataSet\) to a relation \(r\in\relationSet\). +As such, our approach is sentential (Section~\ref{sec:relation extraction:definition}). + +To provide a supervision signal to our relation classifier, we follow the \textsc{vae} model of Section~\ref{sec:relation extraction:vae} \parencitex{vae_re}. +However, the interpretation of their model as a \textsc{vae} is part of the limitation we observed and is in conflict with the modifications we introduce. +We, therefore, reformulate their approach as a \emph{fill-in-the-blank} task: +\begin{indentedexample} + ``The \uhead{sol} was the currency of \utail{~?~} between 1863 and 1985.'' +\end{indentedexample} +To correctly fill in the blank, we could directly learn to predict the missing entity; but in this case, we would not be able to learn a relation classifier. +Instead, we first want to learn that this sentence expresses the semantic relation ``currency used by'' before using this information for a (self-)supervised entity prediction task. +To this end, we make the following assumption: +\begin{assumption}{blankable} + The relation can be predicted by the text surrounding the two entities alone. + Formally, using \(\operatorname{blanked}(s)\) to designate the tagged sentence \(s\in\sentenceSet\) from which the entities surface forms were removed, we can write: + + \smallskip + \noindent + \( \displaystyle \rndm{r} \independent \rndmvctr{e} \mid \operatorname{blanked}(\rndm{s}) \). +\end{assumption} + +Furthermore, since the information between \(\rndm{s}\) and \(\operatorname{blanked}(\rndm{s})\) is determined by \(\rndmvctr{e}\), as a corollary of \hypothesis{blankable}, we have the equivalence \(P(\rndm{r}\mid \rndm{s}) = P(\rndm{r}\mid \operatorname{blanked}(\rndm{s}))\). +Using this assumption and the above observation about filling blanked entities, we design a surrogate fill-in-the-blank task to train a relation extraction model. +This task uses the point of view that a relation is something that allows us to predict \(e_2\) from \(e_1\) and vice versa. +Our goal is to predict a missing entity \(e_{-i}\) given the predicted relation \(r\) and the other entity \(e_i\): +\begin{marginparagraph} + Derivation of Equation~\ref{eq:fitb:model}:\\ + \(P(e_{-i}\mid s, e_i)\)\\ + First introduce and marginalize the latent relation variable \(r\) (``sum rule''):\\ + \null\hfill\(\displaystyle= \sum_{r\in\relationSet} P(r, e_{-i}\mid s, e_i)\)\\ + Apply the definition of conditional probability (``product rule''):\\ + \null\hfill\(\displaystyle= \sum_{r\in\relationSet} P(r\mid s, e_i) P(e_{-i}\mid r, s, e_i)\)\\ + Apply the independence \hypothesis{blankable} assumption on the first term and our definition of a relation on the second:\\ + \null\hfill\(\displaystyle= \sum_{r\in\relationSet} P(r\mid s) P(e_{-i}\mid r, e_i)\)\\ + Furthermore, by applying the corollary of \hypothesis{blankable}, we can write:\\ + \null\hfill\(\displaystyle= \sum_{r\in\relationSet} P(r\mid \operatorname{blanked}(s)) P(e_{-i}\mid r, e_i)\) +\end{marginparagraph} +\begin{equation} + P(e_{-i} \mid s, e_i) = + \sum_{r\in\relationSet} \underbrace{P(r\mid s)}_{\text{(i) classifier}} \underbrace{P(e_{-i} \mid r, e_i)}_{\text{(ii) entity predictor}} \qquad \text{for } i=1, 2, + \label{eq:fitb:model} +\end{equation} +where \(e_1, e_2\in\entitySet\) are the two entities identifiers, \(s\in\sentenceSet\) is the sentence mentioning them, and \(r\in\relationSet\) is the relation linking them. +As the entity predictor can consider either entity, we use \(e_i\) to designate the given entity, and \(e_{-i}=\{e_1, e_2\}\setminus \{e_i\}\) the one to predict. + +The relation classifier \(P(r\mid s)\) and entity predictor \(P(e_{-i}\mid r, e_i)\) are trained jointly to discover a missing entity, with the constraint that the entity predictor cannot access the input sentence directly. +Thus, all the required information must be condensed into \(r\), which acts as a bottleneck. +We advocate that this information is the semantic relation between the two entities. + +Note that \textcite{vae_re} did not make the \hypothesis{blankable} hypothesis. +Instead, their classifier is conditioned on both \(e_i\) and \(e_{-i}\), strongly relying on the fact that \(r\) is an information bottleneck and will not ``leak'' the identity of \(e_{-i}\). +This is possible since they use pre-defined sentence representations; this is impossible to enforce with the learned representations of a deep neural network. + +In the following, we first describe the relation classifier \(P(r\mid s)\) in Section~\ref{sec:fitb:classifier} before introducing the entity predictor \(P(e_{-i}\mid r, e_i)\) in Section~\ref{sec:fitb:entity predictor}. +Arguing that the resulting model is unstable, we describe the two new RelDist losses in Section~\ref{sec:fitb:regularization}. + +\begin{figure}[t] + \centering + \input{mainmatter/fitb/fitb split.tex} + \scaption*[Fill-in-the-blanks sentence partition.]{ + A sentence from Wikipedia where the conveyed relation is ``\textsl{currency used by}.'' + In contrast to Figure~\ref{fig:relation extraction:dipre split}, which presented \textsc{dipre}'s split-in-three-affixes, we do not label the entities surface forms with \(e_1\) and \(e_2\) to avoid confusion with entity identifiers. + \label{fig:fitb:split} + } +\end{figure} + +\subsection{Unsupervised Relation Classifier} +\label{sec:fitb:classifier} + +Our model for \(P(r\mid s)\) follows the then state-of-the-art practices for supervised relation extraction by using a piecewise convolutional neural network (\textsc{pcnn}, Section~\ref{sec:relation extraction:pcnn}, \citex{pcnn}). +Similar to \textsc{dipre}'s split-in-three-affixes, the input sentence can be split into three parts separated by the two entities (see Figure~\ref{fig:fitb:split}). +In a \textsc{pcnn}, the model outputs a representation for each part of the sentence. +These are then combined to make a prediction. +Figure~\ref{fig:relation extraction:pcnn} shows the network architecture that we now describe. + +First, each word of \(s\) is mapped to a real-valued vector. +In this work, we use standard word embeddings, initialized with GloVe% +\sidenote{We use the \texttt{6B.50d} pre-trained word embeddings from \url{https://nlp.stanford.edu/projects/glove/}} +(Section~\ref{sec:context:word2vec}, \cite{glove}), and fine-tune them during training. +Based on those embeddings, a convolutional layer detects patterns in subsequences of words. +Then, a max-pooling along the text length combines all features into a fixed-size representation. +Note that in our architecture, we obtained better results by using three distinct convolutions, one for each sentence part (i.e.~the weights are not shared). +We then apply a non-linear function (\(\tanh\)) and sum the three vectors into a single representation for \(s\). +Finally, this representation is fed to a softmax layer to predict the distribution over the relations. +This distribution can be plugged into Equation~\ref{eq:fitb:model}. +Denoting \(\operatorname{\textsc{pcnn}}\) our classifier, we have: +\begin{equation*} + P(r\mid s) = \operatorname{\textsc{pcnn}}(r; s, \vctr{\phi}), +\end{equation*} +where \(\vctr{\phi}\) are the parameters of the classifier. +Note that we can use the \textsc{pcnn} to predict the relationship for any pair of entities appearing in any sentence since the input will be different for each selected pair (see Figure~\ref{fig:relation extraction:pcnn}). +Furthermore, since the \textsc{pcnn} ignore the entities surface forms, we can have \(P(r\mid s) = P(r\mid \operatorname{blanked}(s))\) which is necessary to enforce \hypothesis{blankable}. + +\subsection{Entity Predictor} +\label{sec:fitb:entity predictor} +The purpose of the entity predictor is to provide supervision for the relation classifier. +As such, it needs to be differentiable. +We follow \textcite{vae_re} to model \(P(e_i \mid r, e_{-i})\), and use an energy-based formalism, where \(\psi(e_1, r, e_2)\) is the energy associated with \((e_1,r,e_2)\). The probability is obtained as follows: +\begin{equation} + P(e_1 \mid r, e_2) = \frac + {\exp(\psi(e_1, r, e_2))} + {\sum_{e'\in\entitySet} \exp(\psi(e', r, e_2))}, + \label{eq:fitb:entity predictor softmax} +\end{equation} +where \(\psi\) is expressed as the sum of two standard relational learning models selectional preferences (Section~\ref{sec:context:selectional preferences}) and \textsc{rescal} (Section~\ref{sec:context:rescal}): +\begin{equation*} + \psi(e_1, r, e_2; \vctr{\theta}) = \underbrace{\vctr{u}_{e_1}\transpose \vctr{a}_r + \vctr{u}_{e_2}\transpose \vctr{b}_r}_\text{Selectional Preferences} + \underbrace{\vctr{u}_{e_1}\transpose \mtrx{C}_r \vctr{u}_{e_2}}_\textsc{rescal} +\end{equation*} +where \(\mtrx{U}\in\symbb{R}^{\entitySet\times m}\) is an entity embedding matrix, \(\mtrx{A}, \mtrx{B}\in\symbb{R}^{\relationSet\times m}\) are two matrices encoding the preferences of each relation of certain entities, \(\tnsr{C}\in\symbb{R}^{\relationSet\times m\times m}\) is a three-way tensor encoding the entities interactions, and the hyperparameter \(m\) is the dimension of the embedded entities. +The function \(\psi\) also depends on the energy functions parameters \(\vctr{\theta}=\{\tnsr{A}, \mtrx{B}, \mtrx{C}, \mtrx{U}\}\) that we might omit for legibility. +\textsc{rescal} \parencite{rescal} uses a bilinear tensor product to gauge the compatibility of the two entities; whereas, in the Selectional Preferences model, only the predisposition of an entity to appear as the subject or object of a relation is captured. + +\paragraph{Negative Sampling} +The number of entities being very large, the partition function of Equation~\ref{eq:fitb:entity predictor softmax} cannot be efficiently computed. +To avoid the summation over the set of entities, we follow Section~\ref{sec:context:negative sampling} and use negative sampling \parencite{word2vec_follow-up}; instead of training a softmax classifier, we train a discriminator which tries to recognize real triplets (\(\rndm{D}=1\)) from fake ones (\(\rndm{D}=0\)): +\begin{equation*} + P(\rndm{D}=1\mid e_1, e_2, r) = \sigmoid \left( \psi(e_1, r, e_2) \right), +\end{equation*} +where \(\sigmoid(x) = 1 \divslash (1 + \exp(-x))\) is the sigmoid function. +This model is then trained by generating negative entities for each position and optimizing the negative log-likelihood: +\begin{equation} + \begin{split} + \loss{ep}(\vctr{\theta}, \vctr{\phi}) = \expectation_{\substack{(\rndm{s}, \rndm{e}_1, \rndm{e}_2)\sim \uniformDistribution(\dataSet)\\\rndm{r}\sim \operatorname{\textsc{pcnn}}(\rndm{s}; \vctr{\phi})}} \bigg[ & - \log \sigmoid \left( \psi(\rndm{e}_1, \rndm{r}, \rndm{e}_2; \vctr{\theta}) + b_{\rndm{e}_1}\right) \\ + & - \log \sigmoid \left( \psi(\rndm{e}_1, \rndm{r}, \rndm{e}_2; \vctr{\theta}) + b_{\rndm{e}_2}\right) \\ + & - \sum_{j=1}^k \expectation_{\rndm{e}'\sim\uniformDistribution_\dataSet(\entitySet)} \left[ \log \sigmoid \left( - \psi(\rndm{e}_1, \rndm{r}, \rndm{e}'; \vctr{\theta}) - b_{\rndm{e}'} \right) \right] \\ + & - \sum_{j=1}^k \expectation_{\rndm{e}'\sim\uniformDistribution_\dataSet(\entitySet)} \left[ \log \sigmoid \left( - \psi(\rndm{e}', \rndm{r}, \rndm{e}_2; \vctr{\theta}) - b_{\rndm{e}'} \right) \right] \bigg] + \end{split} + \label{eq:fitb:entity prediction loss} +\end{equation} +This loss is defined over the empirical data distribution \(\uniformDistribution(\dataSet)\), i.e.~the samples \((\rndm{s}, \rndm{e}_1, \rndm{e}_2)\) follow a uniform distribution over sentences tagged with two entities; and the empirical entity distribution \(\uniformDistribution_\dataSet(\entitySet)\), that is the categorical distribution over \(\entitySet\) where each entity is weighted by its frequency in \(\dataSet\). +The distribution of the relation \(\rndm{r}\) for the sentence \(\rndm{s}\) is then given by the classifier \(\operatorname{\textsc{pcnn}}(\rndm{s}; \vctr{\phi})\), which corresponds to the \(\sum_{r\in\relationSet} P(r\mid s)\) in Equation~\ref{eq:fitb:model}. +Following standard practice, during training, the expectation on negative entities is approximated by sampling \(k\) random entities following the empirical entity distribution \(\entitySet\) for each position. + +\paragraph{Biases} +Following \textcite{vae_re}, we add a bias for entities to \(\psi\). +These biases are parametrized by a single vector \(\vctr{b}\in\symbb{R}^\entitySet\). +They encode how some entities are more likely to appear than others; as such, the \(+\vctr{b}_{e_i}\) appear in \loss{ep} where the \(P(e_i\mid r, e_{-i})\) would appear in the negative sampling estimation. + +\paragraph{Approximation} +When \(|\relationSet|\) is large, the expectation over \(\rndm{r}\sim \operatorname{\textsc{pcnn}}(\rndm{s}; \vctr{\phi})\) can be slow to evaluate. +To avoid computing \(\psi\) for all possible relation \(r\in\relationSet\), we employ an optimization also used by \textcite{vae_re}. +This optimization is built upon the following approximation: +\begin{equation} + \expectation_{\rndm{r}\sim \operatorname{\textsc{pcnn}}(\rndm{s}; \vctr{\phi})}[ \log \sigmoid (\psi(\rndm{e}_1, \rndm{r}, \rndm{e}_2; \vctr{\theta}))] \approx \log \sigmoid \left(\expectation_{\rndm{r}\sim \operatorname{\textsc{pcnn}}(\rndm{s}; \vctr{\phi})}\left[\psi(\rndm{e}_1, \rndm{r}, \rndm{e}_2; \vctr{\theta})\right]\right). +\end{equation} +Since the function \(\psi\) is linear in \(r\), we can efficiently compute its expected value over \(r\) using the convex combinations of the relation embeddings. +For example we can replace the selectional preference of a relation \(r\) for a head entity \(e_1\): \(\vctr{u}_{e_1}\transpose \vctr{a}_r\) by the selectional preference of a distribution \(\operatorname{\textsc{pcnn}}(s; \vctr{\phi})\) for a head entity: \(\vctr{u}_{e_1}\transpose (\operatorname{\textsc{pcnn}}(s; \vctr{\phi})\transpose \mtrx{A})\). + +\subsection{RelDist losses} +\label{sec:fitb:regularization} +Training the classifier through Equation~\ref{eq:fitb:entity prediction loss} alone is very unstable and dependent on precise hyperparameter tuning. +More precisely, according to our early experiments, the training process usually collapses into one of two regimes: +\begin{marginfigure} + \centering + \input{mainmatter/fitb/problem 1.tex} + \scaption[Illustration of \problem{1}.]{ + Illustration of \problem{1}. + The classifier assigns roughly the same probability to all relations. + Instead, we would like the classifier to predict a single relation confidently. + \label{fig:fitb:problem 1} + } +\end{marginfigure}% +\begin{marginfigure} + \vspace{5mm} + \centering + \input{mainmatter/fitb/problem 2.tex} + \scaption[Illustration of \problem{2}.]{ + Illustration of \problem{2}. + The classifier consistently predicts the same relation. + This is clearly visible when taking the average distribution (by marginalizing over the sentences \(\rndm{s}\)). + Instead, we would like the classifier to predict a diverse set of relations. + \label{fig:fitb:problem 2} + } +\end{marginfigure} +\begin{description}[style=multiline, labelwidth=\widthof{(\problem{2})\,}, leftmargin=\dimexpr\labelwidth+5mm\relax] + \item[(\problem{1})] The classifier is very uncertain about which relation is expressed and outputs a uniform distribution over relations (Figure~\ref{fig:fitb:problem 1}); + \item[(\problem{2})] All sentences are classified as conveying the same relation (Figure~\ref{fig:fitb:problem 2}). +\end{description} +In both cases, the entity predictor can do a good job minimizing \loss{ep} by ignoring the output of the classifier, simply exploiting entities' co-occurrences. +More precisely, many entities only appear in one relationship with a single other entity. In this case, the entity predictor can easily ignore the relationship \(r\) and predict the missing entity---and this pressure is even worse at the beginning of the optimization process as the classifier's output is not yet reliable. + +This instability problem is particularly prevalent since the two components (classifier and entity predictor) are strongly interdependent: the classifier cannot be trained without a good entity predictor, which itself cannot take \(r\) into account without a good classifier resulting in a bootstrapping problem. +To overcome these pitfalls, we developed two additional losses, which we now describe. + +\paragraph{Skewness.} +Firstly, to encourage the classifier to be confident in its output, we minimize the entropy of the predicted relation distribution. +This addresses \problem{1} by forcing the classifier toward outputting one-hot vectors for a given sentence using the following loss: +\begin{equation} + \loss{s}(\vctr{\phi}) = \expectation_{(\rndm{s}, \rndmvctr{e})\sim \uniformDistribution(\dataSet)} \left[ \entropy(\rndm{R} \mid \rndm{s}, \rndmvctr{e}; \vctr{\phi}) \right], + \label{eq:fitb:skewness} +\end{equation} +where \(\rndm{R}\) is the random variable corresponding to the predicted relation. +Following our first independence hypothesis, the entropy of equation~\ref{eq:fitb:skewness} is equivalent to \(\entropy(\rndm{R}\mid \rndm{s})\). + +\paragraph{Distribution Distance.} +Secondly, to ensure that the classifier predicts several relations, we enforce \hypothesis{uniform} by minimizing the Kullback--Leibler divergence between the model prior distribution over relations \(P(\rndm{R}\mid\vctr{\phi})\) and the uniform distribution% +\sidenote[][15mm]{ + Other distributions could be used, but in the absence of further information, this might be the best thing to do. + See Section~\ref{sec:fitb:conclusion} for a discussion of alternatives. +} +over the set of relations \(\uniformDistribution(\relationSet)\), that is: +\begin{equation} + \loss{d}(\vctr{\phi}) = \kl(P(\rndm{R}\mid\vctr{\phi}) \mathrel{\|} \uniformDistribution(\relationSet)). + \label{eq:fitb:uniformity} +\end{equation} +Note that contrary to \loss{s}, to have a good approximation of \(P(\rndm{R}\mid\vctr{\phi})\), the loss \loss{d} measures the unconditional distribution over \(\rndm{R}\), i.e.~the distribution of predicted relations over all sentences. +This addresses \problem{2} by forcing the classifier toward predicting each class equally often over a set of sentences. + +To satisfactorily and jointly train the entity predictor and the classifier, we use the two losses at the same time, resulting in the final loss: +\begin{equation} + \symcal{L}(\vctr{\theta}, \vctr{\phi}) = \loss{ep}(\vctr{\theta}, \vctr{\phi}) + \alpha \loss{s}(\vctr{\phi}) + \beta \loss{d}(\vctr{\phi}), + \label{eq:fitb:fullloss} +\end{equation} +where \(\alpha\) and \(\beta\) are both positive hyperparameters. + +All three losses are defined over the real data distribution, but in practice, they are approximated at the level of a mini-batch. +First, both \loss{ep} and \loss{s} can be computed for each sample independently. +To optimize \loss{d} however, we need to estimate \(P(\rndm{R})\) at the mini-batch level and maximize the entropy of the mean predicted relation. +Formally, let \(s_i\) for \(i=1,\dotsc,B\) be the \(i\)-th sentence in a batch of size \(B\), we approximate \loss{d} as: +\begin{equation*} + \sum_{r\in\relationSet} \left( \sum\limits_{i=1}^B \frac{\operatorname{\textsc{pcnn}}(r; s_i)}{B} \right) \log \left( \sum\limits_{i=1}^B \frac{\operatorname{\textsc{pcnn}}(r; s_i)}{B} \right). +\end{equation*} + +\paragraph{Learning} +We optimize the empirical estimation of Equation~\ref{eq:fitb:fullloss}, learning the \textsc{pcnn} parameters and word embeddings \(\vctr{\phi}\) as well as the entity predictor parameters and entity embeddings \(\vctr{\theta}\) jointly. diff --git a/mainmatter/fitb/problem 1.tex b/mainmatter/fitb/problem 1.tex @@ -0,0 +1,15 @@ +\begin{tikzpicture} + \node[anchor=south west] at (0, 5) {\IfLanguageName{french}{Distribution dégénérée}{Degenerate distributions}:}; + \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{4.5}{0/0.32, 1/0.35, 2/0.31, 3/0.37, 4/0.38, 5/0.36, 6/0.34, 7/0.36, 8/0.36, 9/0.31} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{4}{0/0.31, 1/0.37, 2/0.38, 3/0.35, 4/0.32, 5/0.33, 6/0.37, 7/0.36, 8/0.32, 9/0.35} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{3.5}{0/0.32, 1/0.35, 2/0.31, 3/0.38, 4/0.32, 5/0.33, 6/0.37, 7/0.33, 8/0.32, 9/0.33} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{3}{0/0.33, 1/0.31, 2/0.34, 3/0.36, 4/0.34, 5/0.33, 6/0.37, 7/0.35, 8/0.36, 9/0.32} + \node at (0.75, 2.665) {\(\vdots\)}; + + \node[anchor=south west] at (0, 2) {\IfLanguageName{french}{Distribution désirée}{Desired distributions}:}; + \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{1.5}{0/0.02, 1/0.05, 2/0.01, 3/0.07, 4/0.88, 5/0.06, 6/0.04, 7/0.06, 8/0.06, 9/0.01} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{1}{0/0.01, 1/0.07, 2/0.88, 3/0.05, 4/0.02, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{0.5}{0/0.02, 1/0.05, 2/0.01, 3/0.88, 4/0.02, 5/0.03, 6/0.07, 7/0.03, 8/0.02, 9/0.03} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{0}{0/0.03, 1/0.01, 2/0.04, 3/0.06, 4/0.04, 5/0.03, 6/0.87, 7/0.05, 8/0.06, 9/0.02} + \node at (0.75, -0.335) {\(\vdots\)}; +\end{tikzpicture}% diff --git a/mainmatter/fitb/problem 2.tex b/mainmatter/fitb/problem 2.tex @@ -0,0 +1,17 @@ +\begin{tikzpicture} + \node[anchor=south west] at (0, 6) {Degenerate distributions:}; + \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{5.5}{0/0.02, 1/0.05, 2/0.01, 3/0.87, 4/0.08, 5/0.06, 6/0.04, 7/0.06, 8/0.06, 9/0.01} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{5}{0/0.01, 1/0.07, 2/0.08, 3/0.85, 4/0.02, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{4.5}{0/0.02, 1/0.05, 2/0.01, 3/0.88, 4/0.02, 5/0.03, 6/0.07, 7/0.03, 8/0.02, 9/0.03} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{4}{0/0.03, 1/0.01, 2/0.04, 3/0.86, 4/0.04, 5/0.03, 6/0.07, 7/0.05, 8/0.06, 9/0.02} + \node at (0.75, 3.665) {\(\vdots\)}; + \drawDistribution{mean activation}{\(\text{average} = \)}{3}{0/0.02, 1/0.045, 2/0.035, 3/0.865, 4/0.04, 5/0.0375, 6/0.0625, 7/0.05, 8/0.04, 9/0.0275} + + \node[anchor=south west] at (0, 2.5) {Desired distributions:}; + \drawDistribution{activation}{\(P(\rndm{r}\mid s_1) = \)}{2}{0/0.02, 1/0.05, 2/0.01, 3/0.07, 4/0.08, 5/0.06, 6/0.04, 7/0.86, 8/0.06, 9/0.01} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_2) = \)}{1.5}{0/0.01, 1/0.07, 2/0.08, 3/0.05, 4/0.82, 5/0.03, 6/0.07, 7/0.06, 8/0.02, 9/0.05} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_3) = \)}{1}{0/0.02, 1/0.05, 2/0.01, 3/0.08, 4/0.02, 5/0.83, 6/0.07, 7/0.03, 8/0.02, 9/0.03} + \drawDistribution{activation}{\(P(\rndm{r}\mid s_4) = \)}{0.5}{0/0.83, 1/0.01, 2/0.04, 3/0.06, 4/0.04, 5/0.03, 6/0.07, 7/0.05, 8/0.06, 9/0.02} + \node at (0.75, 0.165) {\(\vdots\)}; + \drawDistribution{mean activation}{\(\text{average} = \)}{-0.5}{0/0.35, 1/0.32, 2/0.31, 3/0.37, 4/0.38, 5/0.36, 6/0.34, 7/0.36, 8/0.36, 9/0.31} +\end{tikzpicture}% diff --git a/mainmatter/fitb/quantitative.tex b/mainmatter/fitb/quantitative.tex @@ -0,0 +1,34 @@ +\begin{tabular}[b]{c c c r r r r r r r} + \toprule + \multirow{2}{*}{Dataset} & \multicolumn{2}{c}{Model} & \multicolumn{3}{c}{\bcubed} & \multicolumn{3}{c}{V-measure} & \multirow{2}{*}{\textsc{ari}} \\ + \cmidrule(lr){2-3}\cmidrule(lr){4-6}\cmidrule(lr){7-9} + & Classifier & Reg. & \fone & Prec. & Rec. & \fone & Hom. & Comp. & \\ + \midrule + \multirow{8}{*}{\nytfb} & rel-\textsc{lda} & & 29.1 & 24.8 & 35.2 & 30.0 & 26.1 & 35.1 & 13.3 \\ + & rel-\textsc{lda}1 & & 36.9 & 30.4 & 47.0 & 37.4 & 31.9 & 45.1 & 24.2 \\ + & Linear & \loss{vae reg} & 35.2 & 23.8 & 67.1 & 27.0 & 18.6 & 49.6 & 18.7 \\ + & \textsc{pcnn} & \loss{vae reg} & 27.6 & 24.3 & 31.9 & 24.7 & 21.2 & 29.6 & 15.7 \\ + & Linear & \(\loss{s}+\loss{d}\) & 37.5 & 31.1 & 47.4 & \strong{38.7} & 32.6 & 47.8 & 27.6 \\ + & \textsc{pcnn} & \(\loss{s}+\loss{d}\) & \strong{39.4} & 32.2 & 50.7 & 38.3 & 32.2 & 47.2 & \strong{33.8} \\ + & \bertcoder\(^\dagger\) & \(\loss{s}+\loss{d}\) & 41.5 & 34.6 & 51.8 & 39.9 & 33.9 & 48.5 & 35.1 \\ + & \bertcoder\(^\dagger\) & Self\textsc{ore}\(^\dagger\) & \emph{49.1} & 47.3 & 51.1 & \emph{46.6} & 45.7 & 47.6 & \emph{40.3} \\ + \midrule + \multirow{8}{*}{\trexspo} & rel-\textsc{lda} & & 11.9 & 10.2 & 14.1 & 5.9 & 4.9 & 7.4 & 3.9 \\ + & rel-\textsc{lda}1 & & 18.5 & 14.3 & 26.1 & 19.4 & 16.1 & 24.5 & 8.6 \\ + & Linear & \loss{vae reg} & 24.8 & 20.6 & 31.3 & 23.6 & 19.1 & 30.6 & 12.6 \\ + & \textsc{pcnn} & \loss{vae reg} & 25.3 & 19.2 & 37.0 & 23.1 & 18.1 & 31.9 & 10.8 \\ + & Linear & \(\loss{s}+\loss{d}\) & 29.5 & 22.7 & 42.0 & 34.8 & 28.4 & 45.1 & 20.3 \\ + & \textsc{pcnn} & \(\loss{s}+\loss{d}\) & \strong{36.3} & 28.4 & 50.3 & \emph{\strong{41.4}} & 33.7 & 53.6 & \strong{21.3} \\ + & \bertcoder\(^\dagger\) & \(\loss{s}+\loss{d}\) & 38.1 & 30.7 & 50.3 & 39.1 & 37.6 & 40.8 & 23.5 \\ + & \bertcoder\(^\dagger\) & Self\textsc{ore}\(^\dagger\) & \emph{41.0} & 39.4 & 42.8 & \emph{41.4} & 40.3 & 42.5 & \emph{33.7} \\ + \midrule + \multirow{8}{*}{\trexds} & rel-\textsc{lda} & & 9.7 & 6.8 & 17.0 & 8.3 & 6.6 & 11.4 & 2.2 \\ + & rel-\textsc{lda}1 & & 12.7 & 8.3 & 26.6 & 17.0 & 13.3 & 23.5 & 3.4 \\ + & Linear & \loss{vae reg} & 9.0 & 6.4 & 15.5 & 5.7 & 4.5 & 7.9 & 1.9 \\ + & \textsc{pcnn} & \loss{vae reg} & 12.2 & 8.6 & 21.1 & 12.9 & 10.1 & 18.0 & 2.9 \\ + & Linear & \(\loss{s}+\loss{d}\) & 19.5 & 13.3 & 36.7 & \strong{30.6} & 24.1 & 42.1 & \strong{11.5} \\ + & \textsc{pcnn} & \(\loss{s}+\loss{d}\) & \strong{19.7} & 14.0 & 33.4 & 26.6 & 20.8 & 36.8 & 9.4 \\ + & \bertcoder\(^\dagger\) & \(\loss{s}+\loss{d}\) & 22.4 & 17.6 & 30.8 & 31.2 & 26.3 & 38.3 & 12.3 \\ + & \bertcoder\(^\dagger\) & Self\textsc{ore}\(^\dagger\) & \emph{32.9} & 29.7 & 36.8 & \emph{32.4} & 30.1 & 35.1 & \emph{20.1} \\ + \bottomrule +\end{tabular} diff --git a/mainmatter/fitb/related works.tex b/mainmatter/fitb/related works.tex @@ -0,0 +1,48 @@ +\section{Related Work} +\label{sec:fitb:related work} +The \textsc{nlp} and knowledge base related work is presented in Chapter~\ref{chap:context}, and the relation extraction related work is presented in Chapter~\ref{chap:relation extraction}. +The main approaches we built upon are: +\begin{itemize} + \item Distant supervision (Section~\ref{sec:relation extraction:distant supervision}, \cite{distant}): the method we use to obtain a supervised dataset for evaluation;% + \sidenote{As explained in Section~\ref{sec:relation extraction:clustering}, this is sadly standard in the evaluation of clustering approaches.} + \item \textsc{pcnn} (Section~\ref{sec:relation extraction:pcnn}, \cite{pcnn}): our relation classifier, which was the state-of-the-art supervised relation extraction method at the time; + \item Rel-\textsc{lda} (Section~\ref{sec:relation extraction:rellda}, \cite{rellda}): the state-of-the-art generative model we compare to; + \item \textsc{vae} for relation extraction (Section~\ref{sec:relation extraction:vae}, \cite{vae_re}): the overall inspiration for the architecture of our model, with which we share the entity predictor; + \item Self\textsc{ore} (Section~\ref{sec:relation extraction:selfore}, \cite{selfore}): an extension of our work, which, alongside their own approach, proposed an improvement of our relation classifier by replacing the \textsc{pcnn} by a \bertcoder{}. +\end{itemize} +In this section, we give further details about the relationship between our losses and the ones derived by \textcite{vae_re}. +As a reminder, their model is a \textsc{vae} defined from an encoder \(Q(r\mid \vctr{e}, s; \vctr{\phi})\) and a decoder \(P(\vctr{e}\mid r, s; \vctr{\theta})\) as: +\begin{marginparagraph} + The prior of a conditional \textsc{vae} \(P(r\mid\vctr{\theta})\) is usually conditioned on \(s\) too. + However, this additional variable is not used by \textcite{vae_re}. +\end{marginparagraph} +\begin{equation} + \loss{vae}(\vctr{\theta}, \vctr{\phi}) = \expectation_{Q(r\mid \vctr{e}, s; \vctr{\phi})}[ - \log P(\vctr{e}\mid r, s; \vctr{\theta})] + \beta \kl(Q(r\mid \vctr{e}, s; \vctr{\phi}) \mathrel{\|} P(r\mid\vctr{\theta})) + \label{eq:fitb:vae full loss} +\end{equation} +This is simply a rewriting of the \textsc{elbo} of Equation~\ref{eq:relation extraction:elbo} substituting relation extraction variables to the generic ones. +There is however two differences compared to a standard \textsc{vae}. +First, the variable \(s\) is not reconstructed, it simply conditions the whole process. +Second, the regularization term is weighted by a hyperparameter \(\beta\). +This makes the model of \textcite{vae_re} a conditional \(\beta\)\textsc{-vae} \parencitex{conditional_vae, beta_vae}[-11mm]. +The first summand of Equation~\ref{eq:fitb:vae full loss} is called the reconstruction loss since it reconstructs the input variable \(\vctr{e}\) from the latent variable \(r\) and the conditional variable \(s\). +Since we followed the same structure for our model, this reconstruction loss is actually \loss{ep}, the difference being in the relation classifier. +We can then rewrite the loss of \textcite{vae_re} as: +\begin{marginparagraph} + As explained Section~\ref{sec:relation extraction:vae}, \(Q\) is the \textsc{vae}'s encoder. +\end{marginparagraph} +\begin{align*} + \loss{vae}(\vctr{\theta}, \vctr{\phi}) & = \loss{ep}(\vctr{\theta}, \vctr{\phi}) + \beta \loss{vae reg}(\vctr{\theta}, \vctr{\phi}) \\ + \loss{vae reg}(\vctr{\theta}, \vctr{\phi}) & = \kl(Q(\rndm{r}\mid \rndmvctr{e}; \vctr{\phi}) \mathrel{\|} P(\rndm{r}\mid\vctr{\theta})) +\end{align*} +In their work, they select the prior as a uniform distribution over all relations \(P(\rndm{r}\mid\vctr{\theta}) = \uniformDistribution(\relationSet)\) and approximate \loss{vae reg} as follow: +\begin{equation*} + \loss{vae reg}(\vctr{\phi}) = \expectation_{(\rndm{s}, \rndmvctr{e})\sim \uniformDistribution(\dataSet)} \left[ - \entropy(\rndm{R} \mid \rndm{s}, \rndmvctr{e}; \vctr{\phi}) \right] +\end{equation*} +Its purpose is to prevent the classifier from always predicting the same relation, i.e.~it has the same purpose as our distance loss \loss{d}. +However, its expression is equivalent to \(-\loss{s}\), and indeed, minimizing the opposite of our skewness loss increases the entropy of the classifier output, addressing \problem{2} (classifier always outputting the same relation). +Yet, using \(\loss{vae reg}=-\loss{s}\) alone, draws the classifier into the other pitfall \problem{1} (not predicting any relation confidently). +In a traditional \textsc{vae}, \problem{1} is addressed by the reconstruction loss \loss{ep}. +However, at the beginning of training, the supervision signal is so weak that we cannot rely on \loss{ep} for our task. +The \(\beta\) weighting can be decreased to avoid \problem{1}, but this would also lessen the solution to \problem{2}. +This causes a drop in performance, as we show experimentally. diff --git a/mainmatter/fitb/variants.tex b/mainmatter/fitb/variants.tex @@ -0,0 +1,108 @@ +\section{Alternative Models} +\label{sec:fitb:variants} +In this section, we present some variations we considered during the development of our model. +However, we did not manage to obtain satisfactory results with these variants. +When possible, we provide an analysis of why we think these variants did not work; keeping in mind that negative results are difficult to certify, poor results might be improved with a better hyperparameter search. + +\paragraph{\textmd{\textsc{lstm}} Relation Classifier} +Instead of a \textsc{pcnn}, we tried using a deep \textsc{lstm} (Section~\ref{sec:context:lstm}) for our relation classifier. +We never managed to obtain any results with them; the training always collapsed into one of \problem{1} or \problem{2}. +An \textsc{lstm} is quite a lot harder to train than a \textsc{cnn}. +The representation provided by an \textsc{lstm} is the result of several non-linear operator compositions, through which it is hard to backpropagate information. +On the other hand, with good initialization, the representation extracted by a \textsc{cnn} can be close to its input embeddings (which are pre-trained). +Since the training of the entity predictor heavily depends on the relation classifier, it is not surprising that the training fails with an \textsc{lstm}. +The failure of the \textsc{lstm} to provide a good representation at the beginning of the training procedure pushes the entity predictor to ignore the relation variable \(r\), which therefore does not receive any gradient and thus does not provide any supervision back to the \textsc{lstm}. +Retrospectively, pre-training the sentence representation extractor with a language modeling loss could have overcome this problem. +The initial representation would have been good enough for the entity predictor to provide some gradient back to the relation classifier. +This is confirmed by the work of \textcite{selfore}, who trained a \textsc{bert} relation classifier with our losses. +In the end, what made a \textsc{pcnn} work is its shallowness and the pre-trained GloVe word embeddings. + +\paragraph{Gumbel--Softmax} +Another approach to tackling \problem{1} (uniform output) would be to use a discrete distribution for the relation \(r\); instead of marginalizing over all possible relations in Equation~\ref{eq:fitb:entity prediction loss}, we would only take the most likely relation. +However, taking the maximum would not be differentiable. +The Gumbel--softmax technique provides a solution to this problem. +Let's call \(y_r\in\symbb{R}\) for \(r\in\relationSet\) the unnormalized score assigned to each relation by the \textsc{pcnn}. +It can be shown \parencite{gumbel_max} that sampling from \(\softmax(\vctr{y})\) is equivalent to taking \(\argmax_{r\in\relationSet} y_r + \rndm{G}_r\) where \(\rndm{G}_r\) are randomly sampled from the Gumbel distribution. +Knowing this, \textcitex{gumbel_softmax} propose to use the following Gumbel--Softmax distribution: +\begin{equation*} + \pi_r = \frac{(\exp(y_r)+\rndm{G}_r)\divslash\tau}{\sum_{r'\in\relationSet}(\exp(y_{r'})+\rndm{G}_{r'})\divslash\tau} +\end{equation*} +This distribution has the advantage of being differentiable, barring the Gumbel variables \(\rndm{G}_r\). +Furthermore, when the temperature \(\tau>0\) is close to 1, this distribution looks like a standard softmax output. +On the other hand, when the temperature is close to 0, this distribution is closer to a one-hot vector with low entropy. +Decreasing the temperature gradually throughout the training process, this should help us solve \problem{1}. + +\begin{table}[t] + \centering + \input{mainmatter/fitb/gumbel.tex}% + \scaption*[Quantitative results of the Gumbel--Softmax model on the \nytfb{} dataset.]{ + Quantitative results of the Gumbel--Softmax model on the \nytfb{} dataset. + The \loss{s} solution is used together with \loss{d} and a softmax activation, while the Gumbel--Softmax activation is used with \loss{d} only. + Therefore, the first row reports the same results present in Table~\ref{tab:fitb:quantitative}. + \label{tab:fitb:gumbel} + } +\end{table} + +Following a grid search, we initially set \(\tau=1\) with an annealing rate of 0.9 per epoch. +Table~\ref{tab:fitb:gumbel} compares the best Gumbel--Softmax results of \(\loss{ep}+\loss{d}\) with the standard softmax result of \(\loss{ep}+\loss{s}+\loss{d}\) discussed above. +We do not use \loss{s} with Gumbel--Softmax since both mechanisms seek to address \problem{1}. +While the Gumbel--Softmax prevents the model from falling entirely into \problem{1}, it still underperforms compared to the \loss{s} regularization of our standard model. + +\paragraph{Aligning Sentences and Entity Pairs} +Another model we attempted to train purposes to align sentences and entities. +It recombines our \textsc{pcnn} relation classifier with the energy function \(\psi\) into a new layout following a relaxation of the \hypothesis{pullback} assumption.% +\sidenote{This hypothesis introduced Section~\refAssumptionSection{pullback} assumes that the relation can be found from the entities alone, and from the relations alone.} +In this model, we obtain a distribution over the relations \(P(\rndm{r}_s\mid\operatorname{blanked}(s))\) using a \textsc{pcnn} as described Section~\ref{sec:fitb:classifier}, but we also extract a distribution \(P(\rndm{r}_e\mid\vctr{e})\) using the energy function \(\psi\) normalized over the relations \(P(r_e\mid e_1, e_2)\propto \exp(\psi(e_1, r_e, e_2))\). +This model clearly assumes \hypothesis{pullback} since it extracts a relation from the entities and from the sentence separately. +However, in contrast to other models assuming \hypothesis{pullback} (such as \textsc{dipre}, Section~\ref{sec:relation extraction:dipre}), we combine the separate relations into a single one to express the fact that a relation is both conveyed by the sentence and the entities: +\begin{equation} + P(\rndm{r}=r\mid s, \vctr{e}; \vctr{\theta}, \vctr{\phi}) = P(\rndm{r}_s=r\mid s; \vctr{\phi}) P(\rndm{r}_e=r\mid \vctr{e}; \vctr{\theta}) + \label{eq:fitb:align product} +\end{equation} +For the final prediction \(\rndm{r}\), the assumption \hypothesis{pullback} is not made, since it depends both on the sentence and entities. +However, Equation~\ref{eq:fitb:align product} clearly assumes that \(\rndm{r}_s\) and \(\rndm{r}_e\) are independent and \(\rndm{r}\) does not capture any interaction between \(s\) and \(\vctr{e}\). +To train this model, we force the two distributions to align by maximizing: +\begin{marginparagraph}[-4cm] + For numerical stability, the first term of Equation~\ref{eq:fitb:align loss} needs to be computed as: + \begin{multline*} + - \log \sum_{r\in\relationSet}P(r\mid s, \vctr{e}; \vctr{\theta}, \vctr{\phi}) = \\ + \shoveright{- \log \sum_{r\in\relationSet} \exp (y_r^{(s)} + y_e^{(s)})} \\ + \shoveright{+ \log \sum_{r\in\relationSet} \exp (y_r^{(s)})} \\ + + \log \sum_{r\in\relationSet} \exp (y_r^{(e)}) + \end{multline*} + where \(\vctr{y}^{(s)}\) and \(\vctr{y}^{(e)}\) are the logits used for predicting \(\rndm{r}_s\) and \(\rndm{r}_e\) respectively. +\end{marginparagraph} +\begin{marginparagraph}[2cm] + We also attempted (without success) to align the two distribution by minimizing \(\jsd(\rndm{r}_s \mathrel{\|} \rndm{r}_e)\). + Where \(\jsd\) is the Jensen--Shannon divergence defined as: + \begin{align*} + \jsd(\rndm{r}_s \mathrel{\|} \rndm{r}_e) = \frac{1}{2} \big( + & \kl(\rndm{r}_s \mathrel{\|} \rndm{m}) \\ + & + \kl(\rndm{r}_e \mathrel{\|} \rndm{m}) \big) + \end{align*} + with \(\displaystyle P(\rndm{m}) = \frac{1}{2} \big( P(\rndm{r}_s) + P(\rndm{r}_e) \big)\). +\end{marginparagraph} +\begin{equation} + \loss{align}(\vctr{\theta}, \vctr{\phi}) = - \log \sum_{r\in\relationSet}P(r\mid s, \vctr{e}; \vctr{\theta}, \vctr{\phi}) + \loss{d}(\vctr{\theta}) + \loss{d}(\vctr{\phi}). + \label{eq:fitb:align loss} +\end{equation} +Here \loss{s} is not needed since, in order to maximize the pointwise product of two probability mass functions, each distribution must be deterministic on a matching relation, which solves \problem{1}. + +Table~\ref{tab:fitb:align} gives the results on the \nytfb{} datasets and compares them to the fill-in-the-blank model of Section~\ref{sec:fitb:model}. +The main problem we have with this model is its lack of stability. +The average, maximum and minimum given in Table~\ref{tab:fitb:align} are computed over eight runs. +Similar results were observed with slightly different setups such as enforcing \loss{d} on the product (\(\rndm{r}\)) instead of each distribution separately (\(\rndm{r}_s\) and \(\rndm{r}_e\)). +As we can see, the alignment model sometimes reaches excellent performances relative to the fill-in-the-blank model. +However, this happens rarely, and on average, it performs more poorly according to the \bcubed{} and \textsc{ari} metrics. +Its good V-measures scores are nevertheless encouraging. + +\begin{table}[t] + \centering + \input{mainmatter/fitb/align.tex} + \scaption[Quantitative results of the alignment models on the \nytfb{} dataset.]{ + Quantitative results of the alignment model on the \nytfb{} dataset. + The first row reports the same results present in Table~\ref{tab:fitb:quantitative}. + Eight alignment models were trained, the average scores are given in the second row, while the third and fourth rows report the best and worst model among the eight. + \label{tab:fitb:align} + } +\end{table} diff --git a/mainmatter/graph/3-path.tex b/mainmatter/graph/3-path.tex @@ -0,0 +1,9 @@ +\begin{tikzpicture} + \node (e1) {\(\rndm{e}_1\)}; + \node[right=of e1] (e2) {\(\rndm{e}_2\)}; + \node[right=of e2] (e3) {\(\rndm{e}_3\)}; + \node[right=of e3] (e4) {\(\rndm{e}_4\rlap{,}\)}; + \draw[arrow] (e1) to node[midway,above] {\(\rndm{r}_1\)} (e2); + \draw[arrow] (e2) to node[midway,above] {\(\rndm{r}_2\)} (e3); + \draw[arrow] (e3) to node[midway,above] {\(\rndm{r}_3\)} (e4); +\end{tikzpicture} diff --git a/mainmatter/graph/T-REx degrees.xml b/mainmatter/graph/T-REx degrees.xml @@ -0,0 +1,4168 @@ +<degrees> +<m>19392185</m> +<outdegrees> +<value><degree>5</degree><frequency>0.006950635010959312</frequency></value> +<value><degree>280</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>38</degree><frequency>6.301507540279756e-05</frequency></value> +<value><degree>57</degree><frequency>2.4442836121870742e-05</frequency></value> +<value><degree>23</degree><frequency>0.0002114769429025146</frequency></value> +<value><degree>25</degree><frequency>0.0001677995543049945</frequency></value> +<value><degree>921</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>520</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>67</degree><frequency>1.6140522586804943e-05</frequency></value> +<value><degree>105</degree><frequency>6.239626942502869e-06</frequency></value> +<value><degree>24</degree><frequency>0.00019440821134905634</frequency></value> +<value><degree>29</degree><frequency>0.00011860447907236859</frequency></value> +<value><degree>18</degree><frequency>0.00039871731834241475</frequency></value> +<value><degree>42</degree><frequency>5.043268718816369e-05</frequency></value> +<value><degree>502</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>64</degree><frequency>2.160664205709671e-05</frequency></value> +<value><degree>4268</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>172</degree><frequency>2.9908955592162514e-06</frequency></value> +<value><degree>8324</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10</degree><frequency>0.001669383826525995</frequency></value> +<value><degree>4166</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>116</degree><frequency>5.414552305477696e-06</frequency></value> +<value><degree>135</degree><frequency>3.6612687017992042e-06</frequency></value> +<value><degree>34</degree><frequency>8.286843385621579e-05</frequency></value> +<value><degree>28</degree><frequency>0.00012948515084813806</frequency></value> +<value><degree>37</degree><frequency>6.265410524909905e-05</frequency></value> +<value><degree>30</degree><frequency>0.00011195231481135313</frequency></value> +<value><degree>21</degree><frequency>0.0002658803017813619</frequency></value> +<value><degree>297</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>115</degree><frequency>5.31141797584955e-06</frequency></value> +<value><degree>52</degree><frequency>2.970268693290622e-05</frequency></value> +<value><degree>19</degree><frequency>0.0003358053772692453</frequency></value> +<value><degree>32</degree><frequency>9.426477728012598e-05</frequency></value> +<value><degree>31</degree><frequency>9.94730609263474e-05</frequency></value> +<value><degree>14</degree><frequency>0.0007463315763540829</frequency></value> +<value><degree>46</degree><frequency>4.207880648828381e-05</frequency></value> +<value><degree>40</degree><frequency>5.486746336217399e-05</frequency></value> +<value><degree>20</degree><frequency>0.0003044009738974747</frequency></value> +<value><degree>419</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>288</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>189</degree><frequency>2.1658209221910783e-06</frequency></value> +<value><degree>76</degree><frequency>1.3252761357216838e-05</frequency></value> +<value><degree>11</degree><frequency>0.00129877061300725</frequency></value> +<value><degree>361</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>44</degree><frequency>4.506970204750006e-05</frequency></value> +<value><degree>158</degree><frequency>2.7330597351458848e-06</frequency></value> +<value><degree>5576</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9600</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1135</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>77</degree><frequency>1.2788656873890178e-05</frequency></value> +<value><degree>9</degree><frequency>0.0020798068912812042</frequency></value> +<value><degree>4248</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>26</degree><frequency>0.00015495933026629026</frequency></value> +<value><degree>48</degree><frequency>3.666425418280612e-05</frequency></value> +<value><degree>33</degree><frequency>8.255903086733135e-05</frequency></value> +<value><degree>1642</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>58</degree><frequency>2.531947792370999e-05</frequency></value> +<value><degree>59</degree><frequency>2.1503507727468566e-05</frequency></value> +<value><degree>6</degree><frequency>0.005150476854464827</frequency></value> +<value><degree>205</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>119</degree><frequency>4.795746327708816e-06</frequency></value> +<value><degree>216</degree><frequency>1.237611955537759e-06</frequency></value> +<value><degree>313</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>138</degree><frequency>4.280074679568084e-06</frequency></value> +<value><degree>50</degree><frequency>3.460156759024318e-05</frequency></value> +<value><degree>236</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>146</degree><frequency>3.1455970536584713e-06</frequency></value> +<value><degree>73</degree><frequency>1.4799776301639037e-05</frequency></value> +<value><degree>7</degree><frequency>0.0035526166855359516</frequency></value> +<value><degree>112</degree><frequency>7.013134414713969e-06</frequency></value> +<value><degree>68</degree><frequency>1.5831119597920503e-05</frequency></value> +<value><degree>70</degree><frequency>1.5521716609036064e-05</frequency></value> +<value><degree>300</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>227</degree><frequency>1.4438806147940523e-06</frequency></value> +<value><degree>285</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>16</degree><frequency>0.0005444976932718</frequency></value> +<value><degree>61</degree><frequency>2.1864477881167078e-05</frequency></value> +<value><degree>12</degree><frequency>0.0010900267298398815</frequency></value> +<value><degree>35</degree><frequency>7.343164269524038e-05</frequency></value> +<value><degree>191</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>684</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>766</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>299</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>13</degree><frequency>0.0008729289659726327</frequency></value> +<value><degree>795</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1844</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>39</degree><frequency>5.863186639360134e-05</frequency></value> +<value><degree>671</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>343</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>1</degree><frequency>0.04526741055739722</frequency></value> +<value><degree>36</degree><frequency>7.157522476193373e-05</frequency></value> +<value><degree>95</degree><frequency>7.889776216553215e-06</frequency></value> +<value><degree>43</degree><frequency>4.393522442159045e-05</frequency></value> +<value><degree>4</degree><frequency>0.011424241260074613</frequency></value> +<value><degree>214</degree><frequency>1.392313449979979e-06</frequency></value> +<value><degree>49</degree><frequency>3.568447805133872e-05</frequency></value> +<value><degree>17</degree><frequency>0.00044636537863061845</frequency></value> +<value><degree>5545</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1048</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>302</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>118</degree><frequency>4.38320900919623e-06</frequency></value> +<value><degree>406</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>914</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>232</degree><frequency>1.392313449979979e-06</frequency></value> +<value><degree>75</degree><frequency>1.3046492697960545e-05</frequency></value> +<value><degree>111</degree><frequency>5.569253799919916e-06</frequency></value> +<value><degree>85</degree><frequency>1.046813445725688e-05</frequency></value> +<value><degree>322</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>4773</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>155</degree><frequency>3.5581343721710575e-06</frequency></value> +<value><degree>222</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>2141</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3</degree><frequency>0.02029595942901741</frequency></value> +<value><degree>81</degree><frequency>1.1396343423910199e-05</frequency></value> +<value><degree>8</degree><frequency>0.0028023144374911854</frequency></value> +<value><degree>499</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>392</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>98</degree><frequency>7.374104568412481e-06</frequency></value> +<value><degree>56</degree><frequency>2.6763358538504043e-05</frequency></value> +<value><degree>2</degree><frequency>0.03364979242926983</frequency></value> +<value><degree>93</degree><frequency>8.3023135350658e-06</frequency></value> +<value><degree>130</degree><frequency>4.38320900919623e-06</frequency></value> +<value><degree>2009</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>572</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>121</degree><frequency>4.280074679568084e-06</frequency></value> +<value><degree>1029</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>47</degree><frequency>3.955201541239422e-05</frequency></value> +<value><degree>78</degree><frequency>1.2169850896121298e-05</frequency></value> +<value><degree>475</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>3816</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>113</degree><frequency>5.105149316593256e-06</frequency></value> +<value><degree>278</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>841</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>27</degree><frequency>0.00014026268829427937</frequency></value> +<value><degree>394</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>359</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>65</degree><frequency>1.7275000212714554e-05</frequency></value> +<value><degree>87</degree><frequency>9.643059820231707e-06</frequency></value> +<value><degree>22</degree><frequency>0.0002385497044299031</frequency></value> +<value><degree>3122</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1492</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5273</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>632</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1361</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>243</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>411</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>244</degree><frequency>1.3407462851659058e-06</frequency></value> +<value><degree>367</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>134</degree><frequency>3.7644030314273506e-06</frequency></value> +<value><degree>291</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>209</degree><frequency>1.5470149444221989e-06</frequency></value> +<value><degree>1067</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>219</degree><frequency>1.8048507684925653e-06</frequency></value> +<value><degree>107</degree><frequency>5.4661194702917696e-06</frequency></value> +<value><degree>114</degree><frequency>6.703731425829529e-06</frequency></value> +<value><degree>217</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>3513</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>559</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1406</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>887</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2980</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1916</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2217</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>122</degree><frequency>4.589477668452524e-06</frequency></value> +<value><degree>224</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>1363</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3484</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>195</degree><frequency>2.114253757377005e-06</frequency></value> +<value><degree>11348</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2652</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>207</degree><frequency>1.4954477796081257e-06</frequency></value> +<value><degree>11751</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2890</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1532</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4507</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>445</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>2414</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>145</degree><frequency>3.7128358666132774e-06</frequency></value> +<value><degree>167</degree><frequency>2.784626899959958e-06</frequency></value> +<value><degree>15</degree><frequency>0.0006263347838317343</frequency></value> +<value><degree>103</degree><frequency>6.806865755457675e-06</frequency></value> +<value><degree>329</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>306</degree><frequency>1.2891791203518324e-06</frequency></value> +<value><degree>55</degree><frequency>2.6969627197760334e-05</frequency></value> +<value><degree>184</degree><frequency>2.1658209221910783e-06</frequency></value> +<value><degree>455</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>197</degree><frequency>1.7532836036784922e-06</frequency></value> +<value><degree>66</degree><frequency>1.722343304790048e-05</frequency></value> +<value><degree>90</degree><frequency>1.0365000127628732e-05</frequency></value> +<value><degree>220</degree><frequency>1.7017164388644188e-06</frequency></value> +<value><degree>150</degree><frequency>2.3205224166332982e-06</frequency></value> +<value><degree>539</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2595</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>864</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>148</degree><frequency>3.1971642184725445e-06</frequency></value> +<value><degree>129</degree><frequency>4.280074679568084e-06</frequency></value> +<value><degree>86</degree><frequency>9.849328479488e-06</frequency></value> +<value><degree>451</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1262</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>761</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>231</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>9613</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>226</degree><frequency>1.0829104610955392e-06</frequency></value> +<value><degree>274</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>2938</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>386</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>53</degree><frequency>2.7485298845901068e-05</frequency></value> +<value><degree>51</degree><frequency>3.0424627240303244e-05</frequency></value> +<value><degree>181</degree><frequency>2.217388087005152e-06</frequency></value> +<value><degree>98712</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>552</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>218</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>1120</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2618</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>147</degree><frequency>3.1455970536584713e-06</frequency></value> +<value><degree>19994</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>17122</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8580</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>531</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1334</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>240</degree><frequency>1.3407462851659058e-06</frequency></value> +<value><degree>672</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2383</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3636</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>157</degree><frequency>3.2487313832866177e-06</frequency></value> +<value><degree>890</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>268</degree><frequency>1.3407462851659058e-06</frequency></value> +<value><degree>9203</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>846</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3768</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1233</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>949</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>69</degree><frequency>1.629522408124716e-05</frequency></value> +<value><degree>10583</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>267</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>279</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>272</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>447</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>151</degree><frequency>3.0424627240303246e-06</frequency></value> +<value><degree>175</degree><frequency>2.062686592562932e-06</frequency></value> +<value><degree>6235</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>649</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>415</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>2625</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>54</degree><frequency>2.9290149614393634e-05</frequency></value> +<value><degree>229</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>1065</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1524</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>14938</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1040</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>228</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>435</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>234</degree><frequency>1.237611955537759e-06</frequency></value> +<value><degree>645</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>247</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>45</degree><frequency>4.17178363345853e-05</frequency></value> +<value><degree>1319</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>80</degree><frequency>1.2530821049819811e-05</frequency></value> +<value><degree>1922</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>41</degree><frequency>5.032955285853554e-05</frequency></value> +<value><degree>170</degree><frequency>1.7532836036784922e-06</frequency></value> +<value><degree>433</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>71</degree><frequency>1.3407462851659058e-05</frequency></value> +<value><degree>507</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>412</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1103</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1508</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>102</degree><frequency>7.270970238784335e-06</frequency></value> +<value><degree>245</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>971</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1207</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3310</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>235</degree><frequency>1.5470149444221989e-06</frequency></value> +<value><degree>717</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>699</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>254</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>9035</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>616</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>362</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>187</degree><frequency>1.8048507684925653e-06</frequency></value> +<value><degree>331</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>729</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>100</degree><frequency>7.528806062854701e-06</frequency></value> +<value><degree>241</degree><frequency>1.237611955537759e-06</frequency></value> +<value><degree>239</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>233</degree><frequency>1.4954477796081257e-06</frequency></value> +<value><degree>601</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>525</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>791</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>26609</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>128</degree><frequency>3.9191045258695705e-06</frequency></value> +<value><degree>4330</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>89</degree><frequency>9.38522399616134e-06</frequency></value> +<value><degree>315</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>152</degree><frequency>2.8877612295881047e-06</frequency></value> +<value><degree>2237</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>283</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>137</degree><frequency>3.6612687017992042e-06</frequency></value> +<value><degree>110</degree><frequency>5.6208209647339895e-06</frequency></value> +<value><degree>97</degree><frequency>7.838209051739141e-06</frequency></value> +<value><degree>208</degree><frequency>1.7532836036784922e-06</frequency></value> +<value><degree>183</degree><frequency>2.1658209221910783e-06</frequency></value> +<value><degree>573</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>120</degree><frequency>4.795746327708816e-06</frequency></value> +<value><degree>99</degree><frequency>6.652164261015455e-06</frequency></value> +<value><degree>5307</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>901</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>142</degree><frequency>3.3518657129147644e-06</frequency></value> +<value><degree>194</degree><frequency>1.8564179333066387e-06</frequency></value> +<value><degree>180</degree><frequency>2.011119427748859e-06</frequency></value> +<value><degree>101</degree><frequency>7.477238898040628e-06</frequency></value> +<value><degree>441</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>326</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>13989</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2360</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>437</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>198</degree><frequency>1.7532836036784922e-06</frequency></value> +<value><degree>556</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1527</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5742</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>62</degree><frequency>1.985335845341822e-05</frequency></value> +<value><degree>24028</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6067</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4381</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>84</degree><frequency>1.1138507599839832e-05</frequency></value> +<value><degree>550</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>162</degree><frequency>2.423656746261445e-06</frequency></value> +<value><degree>109</degree><frequency>6.394328436945089e-06</frequency></value> +<value><degree>3657</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>131</degree><frequency>4.022238855497718e-06</frequency></value> +<value><degree>213</degree><frequency>1.4954477796081257e-06</frequency></value> +<value><degree>12856</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>485</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>892</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>880</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>512</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>72</degree><frequency>1.5418582279407915e-05</frequency></value> +<value><degree>88</degree><frequency>1.0880671775769466e-05</frequency></value> +<value><degree>173</degree><frequency>2.372089581447372e-06</frequency></value> +<value><degree>804</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>417</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>286</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>154</degree><frequency>2.784626899959958e-06</frequency></value> +<value><degree>133</degree><frequency>3.3518657129147644e-06</frequency></value> +<value><degree>517</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2229</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>504</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>125</degree><frequency>4.950447822151036e-06</frequency></value> +<value><degree>363</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>178</degree><frequency>2.578358240703665e-06</frequency></value> +<value><degree>1200</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>273</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>505</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>987</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1095</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>253</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>6382</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>143</degree><frequency>3.3002985481006913e-06</frequency></value> +<value><degree>316</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>585</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>202</degree><frequency>1.392313449979979e-06</frequency></value> +<value><degree>352</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>773</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>906</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>510</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>96</degree><frequency>9.643059820231707e-06</frequency></value> +<value><degree>413</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>797</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>491</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>3293</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>878</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>312</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>323</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>221</degree><frequency>2.062686592562932e-06</frequency></value> +<value><degree>3928</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>60</degree><frequency>2.294738834226262e-05</frequency></value> +<value><degree>282</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>188</degree><frequency>1.8048507684925653e-06</frequency></value> +<value><degree>524</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>124</degree><frequency>4.795746327708816e-06</frequency></value> +<value><degree>74</degree><frequency>1.4284104653498304e-05</frequency></value> +<value><degree>13487</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>182</degree><frequency>2.062686592562932e-06</frequency></value> +<value><degree>91</degree><frequency>9.797761314673927e-06</frequency></value> +<value><degree>166</degree><frequency>2.939328394402178e-06</frequency></value> +<value><degree>319</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>2269</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2448</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>570</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>927</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>136</degree><frequency>3.867537361055498e-06</frequency></value> +<value><degree>644</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>348</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>123</degree><frequency>4.280074679568084e-06</frequency></value> +<value><degree>149</degree><frequency>2.6814925703318116e-06</frequency></value> +<value><degree>377</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>79</degree><frequency>1.1757313577608711e-05</frequency></value> +<value><degree>1556</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13218</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>869</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1836</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>296</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>824</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>130460</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>63</degree><frequency>1.8667313662694535e-05</frequency></value> +<value><degree>2775</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3894</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>318</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>399</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>537</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>48800</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>667</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>424</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>82</degree><frequency>1.1447910588724272e-05</frequency></value> +<value><degree>94</degree><frequency>8.92111951283468e-06</frequency></value> +<value><degree>1055</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>153</degree><frequency>2.3205224166332982e-06</frequency></value> +<value><degree>250</degree><frequency>1.237611955537759e-06</frequency></value> +<value><degree>5648</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>106</degree><frequency>6.033358283246576e-06</frequency></value> +<value><degree>4271</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6691</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>366</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>196</degree><frequency>1.8048507684925653e-06</frequency></value> +<value><degree>201</degree><frequency>1.4438806147940523e-06</frequency></value> +<value><degree>3663</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>610</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>117</degree><frequency>5.259850811035476e-06</frequency></value> +<value><degree>2206</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>536</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>83</degree><frequency>1.1035373270211686e-05</frequency></value> +<value><degree>4865</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1405</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3579</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2547</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1659</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1344</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4021</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>547</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>168</degree><frequency>3.0940298888443978e-06</frequency></value> +<value><degree>176</degree><frequency>2.475223911075518e-06</frequency></value> +<value><degree>1057</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4267</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4488</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2766</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>284</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>1679</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3476</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4891</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3609</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3867</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5828</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>701</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>478</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3626</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>687</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1025</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>801</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2496</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6416</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1996</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>604</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>199</degree><frequency>1.8564179333066387e-06</frequency></value> +<value><degree>357</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>317</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>3944</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>823</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>381</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>384</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>721</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1618</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>946</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>200</degree><frequency>1.2891791203518324e-06</frequency></value> +<value><degree>12643</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>12797</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>104</degree><frequency>5.981791118432503e-06</frequency></value> +<value><degree>298</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>372</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>4279</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>459</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>161</degree><frequency>2.3205224166332982e-06</frequency></value> +<value><degree>3097</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>739</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>257</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>3752</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5301</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2363</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1141</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>442</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1238</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>192</degree><frequency>1.8048507684925653e-06</frequency></value> +<value><degree>960</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3011</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>495</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>5176</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>945</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2476</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5741</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10859</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1143</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>410</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>328</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>265</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>210</degree><frequency>2.268955251819225e-06</frequency></value> +<value><degree>954</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>7823</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1018</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>452</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>4981</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>574</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>3741</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>877</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>727</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5202</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8172</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9445</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>814</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>873</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>543</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>788</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>4326</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>521</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>636</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>17230</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6817</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3215</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>92</degree><frequency>9.436791160975413e-06</frequency></value> +<value><degree>713</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3564</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>127</degree><frequency>4.1769403499399376e-06</frequency></value> +<value><degree>20456</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4027</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>139</degree><frequency>3.1455970536584713e-06</frequency></value> +<value><degree>2753</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3436</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>741</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>108</degree><frequency>5.981791118432503e-06</frequency></value> +<value><degree>822</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>251</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>425</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>630</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>2367</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1818</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2506</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2653</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8832</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>775</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>258</degree><frequency>1.2891791203518324e-06</frequency></value> +<value><degree>977</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3208</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4410</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1449</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1384</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>255</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>498</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>269</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>3820</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>140</degree><frequency>2.939328394402178e-06</frequency></value> +<value><degree>12576</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1899</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2644</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>382</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>768</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>360</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>648</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>9368</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12052</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13615</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2604</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2070</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>303</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>132</degree><frequency>4.589477668452524e-06</frequency></value> +<value><degree>358</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>5758</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>345</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>5015</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1026</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1100</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9057</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>912</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5764</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4926</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>294</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>165</degree><frequency>2.3205224166332982e-06</frequency></value> +<value><degree>1607</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1158</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>376</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>160</degree><frequency>2.1658209221910783e-06</frequency></value> +<value><degree>476</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>321</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>538</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>14161</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11947</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9041</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6237</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4461</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>398</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>2885</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3515</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>806</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1176</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1183</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5347</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>796</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>647</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>401</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>709</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>994</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>970</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>6507</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1609</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>259</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>144</degree><frequency>3.970671690683644e-06</frequency></value> +<value><degree>159</degree><frequency>2.5267910758895917e-06</frequency></value> +<value><degree>2874</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>828</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>264</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>174</degree><frequency>2.217388087005152e-06</frequency></value> +<value><degree>2032</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>440</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>340</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>163</degree><frequency>2.5267910758895917e-06</frequency></value> +<value><degree>1168</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>715</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1440</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1008</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>961</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>581</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1360</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>777</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>928</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2025</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1086</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>623</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1035</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>608</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>983</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>691</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>7629</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3590</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3521</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>204</degree><frequency>1.392313449979979e-06</frequency></value> +<value><degree>2935</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>225</degree><frequency>1.7017164388644188e-06</frequency></value> +<value><degree>141</degree><frequency>2.9908955592162514e-06</frequency></value> +<value><degree>4875</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>156</degree><frequency>2.8877612295881047e-06</frequency></value> +<value><degree>1159</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>307</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>5961</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1039</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1411</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15969</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>665</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>609</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>336</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>261</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>2205</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>179</degree><frequency>2.3205224166332982e-06</frequency></value> +<value><degree>3648</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1957</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>708</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1302</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>996</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3363</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>237</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>1127</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>293</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>1157</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>446</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>3446</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>561</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2416</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2811</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>722</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>295</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>342</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>762</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>4534</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>893</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>956</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>325</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>925</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>448</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>494</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>731</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2063</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>683</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1702</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>346</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1349</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1620</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>385</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>185</degree><frequency>1.8564179333066387e-06</frequency></value> +<value><degree>126</degree><frequency>4.84731349252289e-06</frequency></value> +<value><degree>4992</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>563</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>490</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>405</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>371</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>2110</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>444</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>246</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>1078</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>351</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>2742</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2848</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>569</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1109</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1684</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2394</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1482</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1094</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1014</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3101</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2051</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1024</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1535</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>277</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>190</degree><frequency>2.011119427748859e-06</frequency></value> +<value><degree>965</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>266</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>203</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>474</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>657</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>334</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>391</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>426</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>2082</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>482</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>270</degree><frequency>1.392313449979979e-06</frequency></value> +<value><degree>1298</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>666</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>369</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>468</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1292</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1004</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2151</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>223</degree><frequency>1.2891791203518324e-06</frequency></value> +<value><degree>374</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>754</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>252</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>1355</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>933</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1098</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1326</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1576</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>838</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>260</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>519</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2174</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>540</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>429</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>839</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1144</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>344</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>211</degree><frequency>1.8048507684925653e-06</frequency></value> +<value><degree>571</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>436</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3180</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>842</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2690</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1531</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2389</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>465</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>389</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>206</degree><frequency>1.0829104610955392e-06</frequency></value> +<value><degree>324</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>834</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>423</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>470</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>404</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>486</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>7128</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1223</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>387</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>781</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>501</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1277</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>356</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>407</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>1817</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>479</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>551</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>304</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>19727</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>784</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3959</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>305</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>1755</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>171</degree><frequency>2.011119427748859e-06</frequency></value> +<value><degree>481</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>1076</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>617</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>656</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2336</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1669</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>497</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>4982</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>292</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>705</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1472</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>460</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>787</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>730</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>281</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>177</degree><frequency>1.8564179333066387e-06</frequency></value> +<value><degree>1185</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3256</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>364</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>402</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>620</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>496</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>693</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>689</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>752</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>926</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1937</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>724</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>776</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>169</degree><frequency>2.268955251819225e-06</frequency></value> +<value><degree>249</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>338</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>1119</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>765</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1515</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>428</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>1431</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>506</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1264</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2598</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>607</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>242</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>311</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>5321</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1390</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4553</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3799</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>542</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>769</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>612</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>694</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>275</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>422</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>24140</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>53801</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>383</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>514</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>2530</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2400</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>794</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1038</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>860</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>309</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>985</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5665</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1458</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1213</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>568</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>164</degree><frequency>1.8048507684925653e-06</frequency></value> +<value><degree>1585</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>308</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>790</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>193</degree><frequency>1.3407462851659058e-06</frequency></value> +<value><degree>368</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>403</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>339</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>332</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>473</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>598</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2083</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>431</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>789</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>337</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>1418</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1243</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>845</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>533</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1683</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>480</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>728</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>903</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>969</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>263</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>395</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>535</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>215</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>354</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1408</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>679</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1134</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1423</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1587</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>697</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>349</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>577</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>230</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>1204</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>11549</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7541</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9271</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6869</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8536</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6426</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1917</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>714</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>952</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1177</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>248</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>546</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1203</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3342</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>578</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>583</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>720</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>614</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>457</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1402</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1434</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1546</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>290</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>560</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1724</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>541</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>7158</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>981</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>327</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>370</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>555</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1608</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>526</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2062</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>874</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>704</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>397</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>289</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>3052</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>19782</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>562</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1181</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>567</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>335</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>621</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>782</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>472</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1236</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>558</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1763</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>47899</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1022</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1234</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1396</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>897</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>483</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>4419</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1017</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>175110</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1315</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>493</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>373</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>347</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>931</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>747</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1036</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1307</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>314</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>711</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1260</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>527</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>640</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1001</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>660</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>759</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>619</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1015</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>757</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>905</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>330</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>466</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>355</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>456</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>655</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1871</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>408</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>685</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>461</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>489</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>565</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>852</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>518</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>751</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>688</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>631</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1365</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>628</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>602</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2218</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>430</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1904</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>818</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1142</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>885</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>618</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>658</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1439</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>803</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>591</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>488</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>427</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1096</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>262</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>851</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>733</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>831</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>763</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1291</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>597</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>674</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>634</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1570</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>458</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>613</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>953</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1960</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>821</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>625</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>676</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1175</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1074</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1733</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>999</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1257</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>379</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>477</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1633</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1191</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1077</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1693</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>416</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1331</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>587</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>454</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>378</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>718</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>590</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1263</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>333</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>438</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>529</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>353</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1220</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3767</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>920</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2084</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>668</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1706</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1856</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>534</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1097</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3112</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>310</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>1634</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9434</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1759</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1131</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4003</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>256</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>805</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1746</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1216</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>633</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1655</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>792</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2076</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1385</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1007</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>944</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3168</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5964</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4150</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11153</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>548</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1533</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>467</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>2275</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1632</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1765</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>186</degree><frequency>1.237611955537759e-06</frequency></value> +<value><degree>5187</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7429</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>271</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>471</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>774</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1452</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>54193</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>62759</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>29177</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8068</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>710</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1467</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2741</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2391</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1239</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1861</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2862</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3414</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6243</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2115</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2796</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13087</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>748</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>393</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>588</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2683</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>734</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1658</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3572</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4145</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1052</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2566</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1446</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>706</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2223</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3662</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4776</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>652</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>703</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>8467</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>584</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>861</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2979</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3339</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>785</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>380</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>212</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>320</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>2481</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5039</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1248</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4681</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2193</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4739</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>449</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1019</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2278</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2563</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1340</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3864</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1593</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>6270</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>350</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1322</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>450</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>975</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2517</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>767</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1729</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>434</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>238</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>950</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1123</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>432</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>998</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8202</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2697</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3671</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4951</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1553</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>34968</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4170</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1769</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>586</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3018</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6284</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10307</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1392</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>696</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>650</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2615</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2212</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3006</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6775</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2453</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>595</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>629</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>509</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>51605</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2042</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2730</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>938</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>418</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>23415</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2577</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1271</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>528</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>462</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2100</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>986</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>16154</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6676</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4408</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4133</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3178</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4073</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1874</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1876</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4896</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5613</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>19398</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2876</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>25970</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10047</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13217</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6139</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2433</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15908</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>719</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>738</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1681</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7939</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2570</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2491</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9530</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>20527</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1992</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9026</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2672</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2160</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>939</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4427</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>439</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>453</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1032</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2041</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>802</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>603</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>9179</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>756</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4624</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>420</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1389</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>941</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>23029</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3008</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>963</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2112</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1473</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2872</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2379</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>301</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>642</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>772</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>916</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2752</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1691</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1425</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1555</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>680</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>695</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>673</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>932</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2560</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>872</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11999</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7634</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>396</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1173</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>421</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>833</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1338</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>917</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5019</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2348</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1002</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2243</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>589</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>744</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>8341</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1222</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>375</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>967</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>812</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1615</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>484</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>582</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>764</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>592</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>605</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1232</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>707</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2264</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3918</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7301</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>750</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1092</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>793</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1201</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3604</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4474</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1274</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>500</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>6288</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1626</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1301</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2236</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3571</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1336</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2068</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2903</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2316</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3633</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>443</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>287</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>3689</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1296</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>6606</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1974</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>808</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5731</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6571</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1794</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4483</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>492</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>871</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>46072</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12635</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4203</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2017</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>8307</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>904</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13706</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>736</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4800</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9793</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6640</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2723</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15484</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8573</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7430</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4091</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2395</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6569</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2412</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1329</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>17370</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5137</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9174</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>44652</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2126</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11934</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10320</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10417</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1537</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>7532</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>22268</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>17101</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4455</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1164</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1044</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>17020</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2055</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>20306</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2783</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6013</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1502</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>9025</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4764</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3237</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6694</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2759</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4208</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>20124</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1252</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>5006</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2012</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9875</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4257</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>809</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1526</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4605</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>21010</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8180</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3351</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>54715</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1694</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>9738</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>549</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>18472</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>22150</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3233</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>735</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3379</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2111</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1986</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4034</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4575</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4849</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3639</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2420</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2579</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12847</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5973</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6661</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1701</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>26915</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1505</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6898</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7712</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>716</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4019</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3853</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1444</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5909</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7781</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3444</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1346</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3618</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3419</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>968</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3065</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3556</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4157</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1448</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1613</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3159</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3461</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>850</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7991</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1420</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>854</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1563</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3009</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6745</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>990</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5975</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4422</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1635</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1426</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>553</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>876</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4452</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>712</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3161</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>388</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>579</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>700</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>837</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1151</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1351</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>856</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1877</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3836</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>686</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1148</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2358</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2703</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1284</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>575</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>908</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>982</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4056</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1853</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>576</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2107</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1348</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3304</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1241</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>276</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>463</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1070</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1215</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>918</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5304</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>742</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1648</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>464</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1429</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5315</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6222</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3739</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4619</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1149</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>881</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1138</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1122</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>580</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>664</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3098</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>991</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>626</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2286</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>743</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3874</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>758</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1084</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3238</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2127</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>653</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1625</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1309</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4117</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1760</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>508</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>365</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>545</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1591</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1517</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>341</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>566</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>594</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1049</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1208</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>511</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>564</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>857</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>879</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>942</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>832</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1080</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2139</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1651</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>487</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>681</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>923</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1010</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>557</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2918</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>17322</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1865</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>957</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11909</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1020</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4664</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>390</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1393</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1586</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1235</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>469</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>7237</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4666</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4236</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7545</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11389</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2805</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3650</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>600</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1834</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4569</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5497</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>516</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1590</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9226</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>637</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>3138</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>865</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2029</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3900</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1198</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3547</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>866</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>935</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1463</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>39474</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2263</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1304</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1375</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1172</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2369</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3450</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1060</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1477</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1023</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1413</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>654</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1474</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1561</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1575</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1217</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>848</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2533</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1240</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>829</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>799</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1320</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>544</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>3331</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1133</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1457</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1973</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1754</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11928</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2967</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>940</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>677</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>5122</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2455</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>18797</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10993</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1308</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1317</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>40213</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1432</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1708</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>849</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>702</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2128</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2027</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1499</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5417</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>844</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>17663</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10815</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2808</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10262</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1293</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1194</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>643</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2879</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2992</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4403</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1373</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2057</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1652</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1436</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>862</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1255</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>10757</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6660</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1258</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1003</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1174</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1129</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1612</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5152</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>895</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1987</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>732</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2793</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1512</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1994</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3355</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1171</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1013</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1299</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1663</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1211</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2660</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1221</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1574</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>951</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>913</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>678</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1471</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>8793</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2323</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>997</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5439</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3405</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1569</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>414</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1644</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4831</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1136</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>12010</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1114</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>896</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1000</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>993</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1192</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>807</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>924</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1614</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1150</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1323</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1124</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>910</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>523</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1089</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2573</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4921</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3761</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4097</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1802</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9838</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2190</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>800</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3392</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>746</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2458</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>755</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>855</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>749</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3705</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2749</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>898</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2682</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3010</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>974</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1354</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>891</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3019</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1927</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2086</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4416</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>24135</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1073</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>627</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1071</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5676</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9797</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>888</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4036</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1953</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>760</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3684</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1821</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2946</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>783</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1303</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1229</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1156</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>934</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>886</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1947</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7343</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1962</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1547</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>26574</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7566</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1722</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2417</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5391</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>611</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1102</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>503</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1686</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1182</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>599</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1037</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1225</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>400</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>675</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1205</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>662</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2623</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>532</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>692</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>635</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>663</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>770</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1594</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3790</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2235</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>988</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1905</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>639</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>596</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1075</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2424</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1107</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1068</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1110</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>973</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2161</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1595</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1437</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3831</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2777</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1041</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1416</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3602</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1849</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1083</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1806</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2859</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>740</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3912</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2472</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>745</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1353</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2249</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3841</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>978</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1016</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>624</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>907</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1202</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1712</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1442</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4222</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>530</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1605</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1091</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4925</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2165</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>786</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1054</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1551</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2305</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1441</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1772</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1662</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1045</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1461</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1845</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>522</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1394</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>966</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>899</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>972</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1906</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1486</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1864</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3280</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1801</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1028</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>515</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>840</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1352</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1372</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2767</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1042</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>827</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1090</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>830</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>698</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3605</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2689</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>409</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1704</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>554</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2125</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1113</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>615</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1167</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2143</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3682</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>847</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2117</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2104</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3410</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1341</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3986</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1276</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>726</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1333</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1498</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1933</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>646</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>690</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1053</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>641</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8153</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>843</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3704</degree><frequency>5.15671648140733e-08</frequency></value> +</outdegrees> +<indegrees> +<value><degree>839</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>184</degree><frequency>1.9595522629347852e-06</frequency></value> +<value><degree>209</degree><frequency>1.5985821092362723e-06</frequency></value> +<value><degree>36</degree><frequency>3.717992583094685e-05</frequency></value> +<value><degree>761</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>7288</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>235</degree><frequency>1.392313449979979e-06</frequency></value> +<value><degree>73</degree><frequency>1.2066716566493151e-05</frequency></value> +<value><degree>117</degree><frequency>4.4863433388243765e-06</frequency></value> +<value><degree>194</degree><frequency>1.2891791203518324e-06</frequency></value> +<value><degree>848</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>22</degree><frequency>8.895335930427644e-05</frequency></value> +<value><degree>79</degree><frequency>8.61171652395024e-06</frequency></value> +<value><degree>229</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>944</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>477</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5145</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1369</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1064</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>997</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4979</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4095</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>260</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>92</degree><frequency>6.394328436945089e-06</frequency></value> +<value><degree>45</degree><frequency>2.7485298845901068e-05</frequency></value> +<value><degree>382</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1292</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>624</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>171</degree><frequency>2.217388087005152e-06</frequency></value> +<value><degree>178</degree><frequency>2.114253757377005e-06</frequency></value> +<value><degree>2008</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>94</degree><frequency>6.961567249899895e-06</frequency></value> +<value><degree>1822</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5744</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>516</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>230</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>48137</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>52066</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4655</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>197</degree><frequency>1.3407462851659058e-06</frequency></value> +<value><degree>60</degree><frequency>1.4335671818312377e-05</frequency></value> +<value><degree>4150</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>30</degree><frequency>5.399082156033474e-05</frequency></value> +<value><degree>1600</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>310</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>153</degree><frequency>2.5267910758895917e-06</frequency></value> +<value><degree>183</degree><frequency>1.5470149444221989e-06</frequency></value> +<value><degree>17</degree><frequency>0.0001330432852203091</frequency></value> +<value><degree>262</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>287</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>37</degree><frequency>3.857223928092683e-05</frequency></value> +<value><degree>88</degree><frequency>8.560149359136168e-06</frequency></value> +<value><degree>32</degree><frequency>5.007171703446517e-05</frequency></value> +<value><degree>1276</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>129</degree><frequency>3.7128358666132774e-06</frequency></value> +<value><degree>132</degree><frequency>3.7644030314273506e-06</frequency></value> +<value><degree>60566</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>473</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>242</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>248</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>1893</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1736</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>72</degree><frequency>1.1293209094282052e-05</frequency></value> +<value><degree>505</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>18</degree><frequency>0.00012004835968716264</frequency></value> +<value><degree>22768</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>25</degree><frequency>7.35863441896826e-05</frequency></value> +<value><degree>24</degree><frequency>7.760858304518031e-05</frequency></value> +<value><degree>71</degree><frequency>1.0983806105397612e-05</frequency></value> +<value><degree>3169</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7</degree><frequency>0.0004762227670579669</frequency></value> +<value><degree>9</degree><frequency>0.00033250507872114465</frequency></value> +<value><degree>16324</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>930</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>106</degree><frequency>5.105149316593256e-06</frequency></value> +<value><degree>383</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>91</degree><frequency>6.291194107316943e-06</frequency></value> +<value><degree>3049</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>48</degree><frequency>2.3978731638544084e-05</frequency></value> +<value><degree>15</degree><frequency>0.0001599097780884413</frequency></value> +<value><degree>1695</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5036</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1812</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>855</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1269</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>56</degree><frequency>1.8615746497880462e-05</frequency></value> +<value><degree>5215</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>103</degree><frequency>6.394328436945089e-06</frequency></value> +<value><degree>1453</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>62</degree><frequency>1.4026268829427937e-05</frequency></value> +<value><degree>508</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>414</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>10410</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>605</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>28</degree><frequency>6.0591418656536125e-05</frequency></value> +<value><degree>1810</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>134</degree><frequency>2.8877612295881047e-06</frequency></value> +<value><degree>464</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>133</degree><frequency>4.1769403499399376e-06</frequency></value> +<value><degree>13671</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>336</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>95</degree><frequency>6.652164261015455e-06</frequency></value> +<value><degree>14199</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>122</degree><frequency>4.434776174010304e-06</frequency></value> +<value><degree>75</degree><frequency>1.0571268786885027e-05</frequency></value> +<value><degree>617</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>534</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1291</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>146</degree><frequency>2.5267910758895917e-06</frequency></value> +<value><degree>332</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>23690</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>396</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>147</degree><frequency>2.6814925703318116e-06</frequency></value> +<value><degree>23</degree><frequency>8.163082190067803e-05</frequency></value> +<value><degree>161</degree><frequency>2.5267910758895917e-06</frequency></value> +<value><degree>145</degree><frequency>2.578358240703665e-06</frequency></value> +<value><degree>10812</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9852</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1682</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6</degree><frequency>0.000607100231356085</frequency></value> +<value><degree>12</degree><frequency>0.0002207074654042337</frequency></value> +<value><degree>37863</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>321</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>128</degree><frequency>4.22850751475401e-06</frequency></value> +<value><degree>26</degree><frequency>6.786238889532046e-05</frequency></value> +<value><degree>759</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>16</degree><frequency>0.00014423335998496302</frequency></value> +<value><degree>796</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>47</degree><frequency>2.6041418231107014e-05</frequency></value> +<value><degree>504</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>7638</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>980</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1708</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>42</degree><frequency>2.8104104823669947e-05</frequency></value> +<value><degree>4</degree><frequency>0.001046246206912733</frequency></value> +<value><degree>283</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>7894</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>20</degree><frequency>0.00010365000127628733</frequency></value> +<value><degree>2932</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>80</degree><frequency>1.0261865798000587e-05</frequency></value> +<value><degree>420</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>188</degree><frequency>1.3407462851659058e-06</frequency></value> +<value><degree>357</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>805</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>21</degree><frequency>9.343970264310082e-05</frequency></value> +<value><degree>31</degree><frequency>5.249537378072662e-05</frequency></value> +<value><degree>215</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>4414</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>598</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1215</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>48492</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>333</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>138</degree><frequency>3.3518657129147644e-06</frequency></value> +<value><degree>30130</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>582</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>538</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>847</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>372</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>297</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>53</degree><frequency>2.0369030101558952e-05</frequency></value> +<value><degree>83</degree><frequency>9.333656831347267e-06</frequency></value> +<value><degree>10</degree><frequency>0.0002948610484068711</frequency></value> +<value><degree>49</degree><frequency>2.3256791331147056e-05</frequency></value> +<value><degree>11227</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>123</degree><frequency>3.0940298888443978e-06</frequency></value> +<value><degree>258</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>552</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>690</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>385</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>1731</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7799</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>29</degree><frequency>5.713641861399322e-05</frequency></value> +<value><degree>181</degree><frequency>1.4438806147940523e-06</frequency></value> +<value><degree>424</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>33</degree><frequency>4.5379105036384504e-05</frequency></value> +<value><degree>111</degree><frequency>5.569253799919916e-06</frequency></value> +<value><degree>169</degree><frequency>1.9595522629347852e-06</frequency></value> +<value><degree>1583</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>100</degree><frequency>5.930223953618429e-06</frequency></value> +<value><degree>115</degree><frequency>3.970671690683644e-06</frequency></value> +<value><degree>5</degree><frequency>0.000741535830026374</frequency></value> +<value><degree>1454</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>34</degree><frequency>4.687455281599263e-05</frequency></value> +<value><degree>883</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1</degree><frequency>0.0033396958620186433</frequency></value> +<value><degree>158</degree><frequency>2.1658209221910783e-06</frequency></value> +<value><degree>2</degree><frequency>0.002290561893876322</frequency></value> +<value><degree>491</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>136</degree><frequency>2.062686592562932e-06</frequency></value> +<value><degree>547</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1826</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>112</degree><frequency>4.69261199808067e-06</frequency></value> +<value><degree>3</degree><frequency>0.00140004852470209</frequency></value> +<value><degree>38</degree><frequency>3.676738851243426e-05</frequency></value> +<value><degree>2467</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2757</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7431</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>210</degree><frequency>1.7017164388644188e-06</frequency></value> +<value><degree>251</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>3190</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>355</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>7721</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>84</degree><frequency>8.663283688764313e-06</frequency></value> +<value><degree>50</degree><frequency>2.000805994786044e-05</frequency></value> +<value><degree>59287</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13</degree><frequency>0.00019698656958976</frequency></value> +<value><degree>2222</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>174</degree><frequency>1.6501492740503456e-06</frequency></value> +<value><degree>185</degree><frequency>2.372089581447372e-06</frequency></value> +<value><degree>1361</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>579</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>52</degree><frequency>2.098783607932783e-05</frequency></value> +<value><degree>78</degree><frequency>8.61171652395024e-06</frequency></value> +<value><degree>316</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>265</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>220</degree><frequency>1.4438806147940523e-06</frequency></value> +<value><degree>139</degree><frequency>3.0940298888443978e-06</frequency></value> +<value><degree>90</degree><frequency>7.992910546181361e-06</frequency></value> +<value><degree>1016</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>354</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>738</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1467</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8</degree><frequency>0.00040836037816264646</frequency></value> +<value><degree>182</degree><frequency>1.8048507684925653e-06</frequency></value> +<value><degree>192</degree><frequency>1.9595522629347852e-06</frequency></value> +<value><degree>337</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>93</degree><frequency>7.941343381367288e-06</frequency></value> +<value><degree>768</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>4724</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11</degree><frequency>0.00024984291352418513</frequency></value> +<value><degree>35</degree><frequency>4.331641844382157e-05</frequency></value> +<value><degree>86</degree><frequency>7.786641886925068e-06</frequency></value> +<value><degree>127</degree><frequency>3.5581343721710575e-06</frequency></value> +<value><degree>307</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>1090</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>797</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>228</degree><frequency>1.5985821092362723e-06</frequency></value> +<value><degree>137</degree><frequency>2.939328394402178e-06</frequency></value> +<value><degree>515</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>57</degree><frequency>1.7481268871970848e-05</frequency></value> +<value><degree>135</degree><frequency>3.6097015369851307e-06</frequency></value> +<value><degree>105</degree><frequency>6.394328436945089e-06</frequency></value> +<value><degree>8440</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>14616</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>338</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>752</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>67</degree><frequency>1.2943358368332398e-05</frequency></value> +<value><degree>2061</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>717</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>85</degree><frequency>8.096044875809508e-06</frequency></value> +<value><degree>284</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>1224</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1499</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>300</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>232</degree><frequency>1.2891791203518324e-06</frequency></value> +<value><degree>822</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>211</degree><frequency>1.8048507684925653e-06</frequency></value> +<value><degree>151</degree><frequency>3.1455970536584713e-06</frequency></value> +<value><degree>82</degree><frequency>7.477238898040628e-06</frequency></value> +<value><degree>484</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>517</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>640</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>356</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>373</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>257</degree><frequency>1.0829104610955392e-06</frequency></value> +<value><degree>40</degree><frequency>3.238417950323803e-05</frequency></value> +<value><degree>74</degree><frequency>1.046813445725688e-05</frequency></value> +<value><degree>1247</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>311</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>207</degree><frequency>1.5470149444221989e-06</frequency></value> +<value><degree>3166</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>119</degree><frequency>4.53791050363845e-06</frequency></value> +<value><degree>204</degree><frequency>1.7017164388644188e-06</frequency></value> +<value><degree>485</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1156</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1252</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>551</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>116</degree><frequency>4.950447822151036e-06</frequency></value> +<value><degree>125</degree><frequency>4.69261199808067e-06</frequency></value> +<value><degree>641</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>43</degree><frequency>2.702119436257441e-05</frequency></value> +<value><degree>59</degree><frequency>1.5573283773850136e-05</frequency></value> +<value><degree>285</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>77</degree><frequency>9.178955336905047e-06</frequency></value> +<value><degree>1607</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>453</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>362</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>14</degree><frequency>0.0001788349275752062</frequency></value> +<value><degree>222</degree><frequency>1.3407462851659058e-06</frequency></value> +<value><degree>1211</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>315</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>2999</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1036</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>432</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>711</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>64</degree><frequency>1.376843300535757e-05</frequency></value> +<value><degree>109</degree><frequency>4.950447822151036e-06</frequency></value> +<value><degree>160</degree><frequency>2.114253757377005e-06</frequency></value> +<value><degree>164</degree><frequency>2.062686592562932e-06</frequency></value> +<value><degree>221</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>41</degree><frequency>3.424059743654467e-05</frequency></value> +<value><degree>275</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>672</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>70</degree><frequency>1.0829104610955392e-05</frequency></value> +<value><degree>335</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>61</degree><frequency>1.5109179290523477e-05</frequency></value> +<value><degree>1982</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>126</degree><frequency>4.53791050363845e-06</frequency></value> +<value><degree>532</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>177</degree><frequency>2.423656746261445e-06</frequency></value> +<value><degree>1358</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>99</degree><frequency>5.517686635105843e-06</frequency></value> +<value><degree>19</degree><frequency>0.00011298365810763459</frequency></value> +<value><degree>762</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>180</degree><frequency>1.4954477796081257e-06</frequency></value> +<value><degree>1911</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1136</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6788</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>76</degree><frequency>9.178955336905047e-06</frequency></value> +<value><degree>2093</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>402</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>349</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>726</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>44</degree><frequency>2.7072761527388482e-05</frequency></value> +<value><degree>288</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>65</degree><frequency>1.2427686720191665e-05</frequency></value> +<value><degree>4538</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>353</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>693</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>474</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>97</degree><frequency>7.064701579528042e-06</frequency></value> +<value><degree>120</degree><frequency>4.07380602031179e-06</frequency></value> +<value><degree>130</degree><frequency>3.5581343721710575e-06</frequency></value> +<value><degree>46</degree><frequency>2.5525746582966283e-05</frequency></value> +<value><degree>3591</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>157</degree><frequency>2.062686592562932e-06</frequency></value> +<value><degree>10400</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>81</degree><frequency>8.560149359136168e-06</frequency></value> +<value><degree>179</degree><frequency>2.475223911075518e-06</frequency></value> +<value><degree>113</degree><frequency>5.362985140663623e-06</frequency></value> +<value><degree>68</degree><frequency>1.299492553314647e-05</frequency></value> +<value><degree>278</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>2713</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>18516</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>148</degree><frequency>2.939328394402178e-06</frequency></value> +<value><degree>914</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>301</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>295</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>844</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>175</degree><frequency>1.907985098120712e-06</frequency></value> +<value><degree>282</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>303</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>345</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>789</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>585</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>5882</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>792</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>226</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>63</degree><frequency>1.438723898312645e-05</frequency></value> +<value><degree>8083</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>324</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>999</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>384</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>155</degree><frequency>2.7330597351458848e-06</frequency></value> +<value><degree>124</degree><frequency>2.939328394402178e-06</frequency></value> +<value><degree>69</degree><frequency>1.2788656873890178e-05</frequency></value> +<value><degree>955</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>352</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>820</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>202</degree><frequency>1.3407462851659058e-06</frequency></value> +<value><degree>170</degree><frequency>2.114253757377005e-06</frequency></value> +<value><degree>58</degree><frequency>1.670776139975975e-05</frequency></value> +<value><degree>619</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1724</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>27</degree><frequency>6.435582168796348e-05</frequency></value> +<value><degree>259</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>1575</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>695</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>511</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>676</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1495</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3952</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>296</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>143</degree><frequency>2.6814925703318116e-06</frequency></value> +<value><degree>831</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>732</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>187</degree><frequency>2.3205224166332982e-06</frequency></value> +<value><degree>168</degree><frequency>2.1658209221910783e-06</frequency></value> +<value><degree>254</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>342</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>1547</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>367</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>87</degree><frequency>7.425671733226555e-06</frequency></value> +<value><degree>612</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>3446</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>166</degree><frequency>2.1658209221910783e-06</frequency></value> +<value><degree>6257</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8015</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2128</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>250</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>246</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>39</degree><frequency>3.4910970579127623e-05</frequency></value> +<value><degree>540</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>292</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>267</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>107</degree><frequency>5.15671648140733e-06</frequency></value> +<value><degree>935</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>290</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>32239</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2985</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>570</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>639</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>785</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>108</degree><frequency>4.331641844382157e-06</frequency></value> +<value><degree>526</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>5053</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>438</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>446</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1103</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1095</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>320</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>114</degree><frequency>4.38320900919623e-06</frequency></value> +<value><degree>66</degree><frequency>1.2479253885005738e-05</frequency></value> +<value><degree>1824</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1828</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1678</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>101</degree><frequency>5.6208209647339895e-06</frequency></value> +<value><degree>277</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>2102</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>386</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>330</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>1847</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1404</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>102</degree><frequency>4.22850751475401e-06</frequency></value> +<value><degree>51</degree><frequency>2.0420597266373025e-05</frequency></value> +<value><degree>858</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>98</degree><frequency>7.1678359091561886e-06</frequency></value> +<value><degree>329</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>494</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>13753</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1008</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>843</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>89</degree><frequency>7.580373227668775e-06</frequency></value> +<value><degree>7204</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>96</degree><frequency>6.291194107316943e-06</frequency></value> +<value><degree>273</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>279</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>131</degree><frequency>3.7644030314273506e-06</frequency></value> +<value><degree>54</degree><frequency>1.7790671860855288e-05</frequency></value> +<value><degree>463</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>481</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>527</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>121</degree><frequency>3.815970196241424e-06</frequency></value> +<value><degree>213</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>199</degree><frequency>1.392313449979979e-06</frequency></value> +<value><degree>152</degree><frequency>2.5267910758895917e-06</frequency></value> +<value><degree>1178</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>110</degree><frequency>5.775522459176209e-06</frequency></value> +<value><degree>189</degree><frequency>1.6501492740503456e-06</frequency></value> +<value><degree>193</degree><frequency>1.4438806147940523e-06</frequency></value> +<value><degree>208</degree><frequency>1.4954477796081257e-06</frequency></value> +<value><degree>299</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>172</degree><frequency>2.578358240703665e-06</frequency></value> +<value><degree>6188</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>201</degree><frequency>1.0829104610955392e-06</frequency></value> +<value><degree>149</degree><frequency>2.062686592562932e-06</frequency></value> +<value><degree>156</degree><frequency>2.217388087005152e-06</frequency></value> +<value><degree>206</degree><frequency>1.5985821092362723e-06</frequency></value> +<value><degree>289</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>271</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>723</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>216</degree><frequency>1.237611955537759e-06</frequency></value> +<value><degree>987</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>428</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>730</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1004</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>917</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>808</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>239</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>389</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>266</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>272</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>462</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>369</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>543</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>281</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>186</degree><frequency>1.8564179333066387e-06</frequency></value> +<value><degree>276</degree><frequency>1.4438806147940523e-06</frequency></value> +<value><degree>328</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>118</degree><frequency>4.589477668452524e-06</frequency></value> +<value><degree>326</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>611</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>786</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1160</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1823</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>580</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>586</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1821</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3145</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6389</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>430</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>252</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>1114</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4201</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>669</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>195</degree><frequency>1.5985821092362723e-06</frequency></value> +<value><degree>9136</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>150</degree><frequency>3.1455970536584713e-06</frequency></value> +<value><degree>7202</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>212</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>142</degree><frequency>2.1658209221910783e-06</frequency></value> +<value><degree>2815</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>957</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>140</degree><frequency>2.423656746261445e-06</frequency></value> +<value><degree>536</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>852</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2603</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>415</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1627</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>576</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>304</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>417</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>906</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>55</degree><frequency>1.7481268871970848e-05</frequency></value> +<value><degree>2418</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>881</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>715</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1805</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>616</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1264</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>656</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>3592</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>817</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>590</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>159</degree><frequency>2.3205224166332982e-06</frequency></value> +<value><degree>351</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>176</degree><frequency>2.629925405517738e-06</frequency></value> +<value><degree>5914</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>405</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2072</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>496</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>853</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1537</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1101</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1696</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>200</degree><frequency>1.8564179333066387e-06</frequency></value> +<value><degree>5708</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>528</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>233</degree><frequency>1.2891791203518324e-06</frequency></value> +<value><degree>2380</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>358</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>945</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1441</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>255</degree><frequency>1.3407462851659058e-06</frequency></value> +<value><degree>323</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>1309</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8133</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3717</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>482</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>664</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>3474</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1399</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2016</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1181</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>832</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>406</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>1951</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1334</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2305</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>440</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>268</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>2283</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>241</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>1714</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3694</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>531</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>770</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>368</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>875</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>550</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>520</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>104</degree><frequency>6.1880597776887955e-06</frequency></value> +<value><degree>688</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>490</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4894</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>680</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>227</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>256</degree><frequency>9.797761314673926e-07</frequency></value> +<value><degree>314</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>563</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1280</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>703</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1933</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>662</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>587</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1352</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>309</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>293</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>421</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>521</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>578</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>2612</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>851</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>225</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>361</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>238</degree><frequency>1.237611955537759e-06</frequency></value> +<value><degree>1626</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>529</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>495</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>427</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>224</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>470</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>205</degree><frequency>1.8564179333066387e-06</frequency></value> +<value><degree>298</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>910</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1029</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2165</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>214</degree><frequency>1.0829104610955392e-06</frequency></value> +<value><degree>448</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1395</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>247</degree><frequency>1.392313449979979e-06</frequency></value> +<value><degree>444</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1298</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>218</degree><frequency>1.392313449979979e-06</frequency></value> +<value><degree>154</degree><frequency>2.9908955592162514e-06</frequency></value> +<value><degree>498</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>245</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>742</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>339</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>217</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>461</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>24459</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4058</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>433</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>836</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>734</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>381</degree><frequency>6.703731425829529e-07</frequency></value> +<value><degree>718</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1878</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>243</degree><frequency>1.237611955537759e-06</frequency></value> +<value><degree>141</degree><frequency>3.3518657129147644e-06</frequency></value> +<value><degree>557</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>263</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>190</degree><frequency>1.4438806147940523e-06</frequency></value> +<value><degree>721</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>223</degree><frequency>1.4438806147940523e-06</frequency></value> +<value><degree>437</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>950</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>4316</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>608</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>240</degree><frequency>1.186044790723686e-06</frequency></value> +<value><degree>506</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>264</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1332</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>308</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>294</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>784</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>450</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>829</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>343</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>691</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>769</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1161</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>539</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>162</degree><frequency>2.217388087005152e-06</frequency></value> +<value><degree>306</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>458</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>3430</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>261</degree><frequency>9.282089666533194e-07</frequency></value> +<value><degree>196</degree><frequency>1.5470149444221989e-06</frequency></value> +<value><degree>253</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>364</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>191</degree><frequency>1.5470149444221989e-06</frequency></value> +<value><degree>500</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1481</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>322</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>3132</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2550</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3968</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>663</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1372</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>456</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>198</degree><frequency>2.011119427748859e-06</frequency></value> +<value><degree>558</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2330</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1208</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>387</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1032</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>821</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>830</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>413</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>492</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>340</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>249</degree><frequency>1.237611955537759e-06</frequency></value> +<value><degree>167</degree><frequency>2.3205224166332982e-06</frequency></value> +<value><degree>392</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>1027</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>360</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>363</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>236</degree><frequency>1.0829104610955392e-06</frequency></value> +<value><degree>7023</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>887</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>724</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>10035</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>388</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1592</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>968</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>163</degree><frequency>2.217388087005152e-06</frequency></value> +<value><degree>442</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>234</degree><frequency>1.1344776259096125e-06</frequency></value> +<value><degree>269</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>1339</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3077</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2906</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>410</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>274</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>3363</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>912</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>165</degree><frequency>1.8564179333066387e-06</frequency></value> +<value><degree>1222</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3198</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>469</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>416</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>687</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>6215</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1743</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1221</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>136765</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>313</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>144</degree><frequency>3.1971642184725445e-06</frequency></value> +<value><degree>803</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3117</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>736</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>460</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>173</degree><frequency>1.907985098120712e-06</frequency></value> +<value><degree>838</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>722</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>429</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>666</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1433</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>111816</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>499</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>763</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>365</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>574</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>318</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>1277</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>231</degree><frequency>1.4438806147940523e-06</frequency></value> +<value><degree>694</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>42433</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>312</degree><frequency>1.0829104610955392e-06</frequency></value> +<value><degree>3601</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>764</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1514</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>441</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>923</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>399</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>40723</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>341</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>401</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>966</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>522</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>564</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>733</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>291</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>242919</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>896</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>2158</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1934</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>480</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>344</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>1158</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>422</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>8296</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>994</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7341</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>305</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>7795</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4257</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>704</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>973</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>17139</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>913</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2627</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>449</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>270</degree><frequency>1.031343296281466e-06</frequency></value> +<value><degree>1578</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>740</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1534</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1030</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1394</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1227</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2408</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2617</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1555</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>566</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3083</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3610</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1754</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8404</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>778</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>370</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>897</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>572</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>544</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>626</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2112</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6995</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4471</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4232</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>904</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>14492</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1836</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3580</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1955</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>744</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>4840</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>613</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1173</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>467640</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3115</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>825</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1131</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>804</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1559</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1237</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>562</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1106</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2064</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>942</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1034</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>319</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>2461</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4079</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>451</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1148</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1411</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>380</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>2855</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>683</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>535</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>3023</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>443</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>922</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9979</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13875</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>23971</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>100736</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>204648</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8932</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15040</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1522224</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13299</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12390</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4593</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4282</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8444</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3588</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>423</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2542</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>92992</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>285182</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1906</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8599</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13941</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12054</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6327</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>8020</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10250</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3683</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>21081</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5183</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11191</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>315344</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>25993</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15177</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6499</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9103</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5220</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>39137</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>640614</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3375</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7077</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5100</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>45451</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>18281</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6458</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3522</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13285</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>32037</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9481</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12230</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2048</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10859</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>51492</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>27900</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12981</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15121</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>20164</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>33211</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>30268</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2405</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1896</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>71629</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3889</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>25711</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>99333</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7473</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9675</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>156812</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>49143</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>14073</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>16518</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1900</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1228</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>16330</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15315</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>29617</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>16934</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>16612</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>50636</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>35479</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>16050</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6200</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6757</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7558</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9155</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>19187</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>29562</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>33623</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>147651</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>57726</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1496</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>48580</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>41134</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>403</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>5851</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>36869</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>26804</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1889</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5738</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>46544</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3313</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5238</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6807</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12752</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3842</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1491</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3098</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>17416</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9039</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7066</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2986</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1112</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>138219</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5565</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4280</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3086</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>874</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2902</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1739</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12779</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>771</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>8114</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3103</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4752</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1778</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7732</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>28873</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>73924</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>21082</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>64284</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>42025</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>35244</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9496</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6375</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7692</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>35105</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4864</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4937</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>55457</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6115</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2376</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4498</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>23738</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9091</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6225</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10114</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7271</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11800</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7791</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>748</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1954</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1769</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>27626</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2451</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1061</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4569</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6575</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2829</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4557</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7280</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10039</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15037</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>85250</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12302</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>36792</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>22821</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>366</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>938</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1165</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2845</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>9664</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>24664</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>819</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>447</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>16521</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2226</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4793</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>53266</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>209486</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6233</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2406</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10223</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1142</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13014</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>467</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>24111</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15381</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3071</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4328</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2277</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1096</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>509</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>815</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>5791</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3911</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5513</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5972</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4891</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4914</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3788</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5906</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6627</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7444</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>22260</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7803</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1913</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>7009</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5374</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12214</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>909</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>9893</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3615</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6765</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>32834</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4674</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4014</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5127</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6660</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12223</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7884</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7295</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1671</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1572</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2551</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6310</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4138</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7132</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>71169</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3229</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12685</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2864</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6361</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6647</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>151316</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13472</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1838</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>21311</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3130</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>17655</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1021</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>4122</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12827</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9513</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1773</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3941</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>12446</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>713</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1138</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1535</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1658</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>581</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>29644</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1699</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>8121</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4869</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15275</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11332</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1010</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>859</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2622</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7213</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12620</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6328</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2372</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1598</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11464</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7380</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2424</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6406</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4087</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7463</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6679</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>678</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1494</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1011</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>720</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1069</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>573</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>5652</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10437</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>39718</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6292</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3630</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>24497</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9078</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2351</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1147</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1470</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1948</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3034</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9881</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5365</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4766</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3876</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11793</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>16014</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>940</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2455</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2817</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6238</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7236</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6026</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>25935</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>850</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1122</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1105</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>857</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2886</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2809</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7114</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>779</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1196</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5249</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1189</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1048</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4977</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2402</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1779</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4035</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8885</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1282</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2747</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>11837</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1635</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1605</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1376</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>610</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>879</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3734</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2571</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1347</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2364</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4841</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7340</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1471</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1763</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1179</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1073</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>8499</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2153</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4213</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3583</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>465</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1903</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9074</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2874</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1303</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>377</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>2242</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>346</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>891</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>514</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>11504</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2765</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5406</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2718</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2910</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2594</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>634</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2289</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1184</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>7286</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4529</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9220</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1289</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5698</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6481</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>795</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4080</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2728</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1155</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>625</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>3370</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2517</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5330</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>317</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>488</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1000</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>725</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>780</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4419</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>707</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>969</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1159</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3795</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>637</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1750</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1294</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>7995</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3050</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2343</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>712</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>577</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3067</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>21814</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1790</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>553</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2095</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1242</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3316</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>939</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2975</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1272</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5151</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2381</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2487</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4720</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>436</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>519</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>632</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>2000</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3507</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5160</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1210</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>941</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>918</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>9586</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>921</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>475</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>376</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>589</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>510</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>719</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>425</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>14279</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2243</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>760</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1018</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>692</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2383</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1249</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>606</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1338</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3118</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>398</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>1295</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>599</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>646</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1374</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>799</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>501</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1864</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>347</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>863</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>638</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1553</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>419</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>203</degree><frequency>1.0829104610955392e-06</frequency></value> +<value><degree>729</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>953</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>9756</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7436</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>487</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>445</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>2597</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1505</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>777</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1593</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2052</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1127</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1825</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>681</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1055</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1865</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1038</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>497</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>541</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1005</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2738</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>559</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>348</degree><frequency>7.735074722110994e-07</frequency></value> +<value><degree>471</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>882</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1959</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>583</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1740</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1302</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1175</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1479</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2976</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2271</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11118</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>809</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1709</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1201</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5722</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8064</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>984</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1144</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1869</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11110</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>30812</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>773</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1611</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1483</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1418</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>395</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>989</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3140</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9784</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1964</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1786</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>15609</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>765</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1938</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>452</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1040</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1320</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1056</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1659</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2514</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>542</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3665</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>866</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>601</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>727</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>878</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4713</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>561</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1503</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>737</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>653</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>630</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>4062</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>943</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2488</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>754</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1079</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8419</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>374</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1009</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1151</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>948</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3674</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1197</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>10567</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>604</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>8956</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>627</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>394</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>3326</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>331</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>1636</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3621</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1873</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>697</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2842</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2287</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4364</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2572</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1430</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10268</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>14638</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>903</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>6689</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4703</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4367</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5255</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6972</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1674</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2076</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9041</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>512</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1405</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>607</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1720</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>679</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4125</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>569</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1300</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>468</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>1212</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>15223</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1902</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>747</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1612</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1619</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1425</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>911</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>327</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>3138</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2321</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1028</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>956</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>978</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>286</degree><frequency>7.219403073970262e-07</frequency></value> +<value><degree>1532</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>596</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>5730</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>513</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1477</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>219</degree><frequency>8.766418018392461e-07</frequency></value> +<value><degree>597</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1484</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>629</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>714</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>756</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>379</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>397</degree><frequency>4.641044833266597e-07</frequency></value> +<value><degree>2894</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>518</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1387</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7262</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>908</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1093</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1104</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1661</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>645</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1217</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>652</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>486</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>782</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>404</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>493</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>600</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1380</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>811</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1020</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1067</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4664</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>7399</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>431</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>837</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>860</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1014</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1730</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1035</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>391</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>793</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10127</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2384</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>525</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>648</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1132</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>393</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1487</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>8791</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>920</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>350</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>302</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>665</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>6696</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>788</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>705</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>996</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>454</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>960</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>614</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>751</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2430</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>728</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>11581</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5224</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>502</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>673</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>548</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>877</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1759</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1749</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5756</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4559</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2342</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>685</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>824</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>565</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>5079</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>594</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>237</degree><frequency>8.250746370251728e-07</frequency></value> +<value><degree>545</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>660</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>435</degree><frequency>5.15671648140733e-07</frequency></value> +<value><degree>6350</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>985</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>412</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>472</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>244</degree><frequency>4.125373185125864e-07</frequency></value> +<value><degree>533</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>657</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>411</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>828</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1442</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>651</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2097</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>833</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4803</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1520</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1257</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>867</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>4123</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1799</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>798</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2895</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>696</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>787</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>3742</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>708</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1195</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>700</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>642</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>636</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1225</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>988</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1290</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>977</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3066</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1182</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>378</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1540</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1319</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1023</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>621</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>615</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>671</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1657</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>483</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>571</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3011</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3030</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1331</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4395</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2119</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2553</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2652</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1357</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>507</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>2569</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1348</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>537</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3249</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>907</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>14516</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2437</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4496</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9954</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1378</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1281</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2939</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1099</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2208</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>593</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>400</degree><frequency>5.672388129548063e-07</frequency></value> +<value><degree>371</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1250</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>555</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3862</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>807</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>567</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>3959</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6058</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1475</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3711</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>19497</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>675</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>622</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>7072</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2129</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2503</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3202</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>375</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>609</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>3231</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>359</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>6728</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>466</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1992</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>17995</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5968</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6489</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>928</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3589</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1571</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1560</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2505</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1346</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>22460</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1307</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1802</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1268</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>489</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1497</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>33759</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>17439</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1884</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2987</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4844</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1531</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3110</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1840</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2575</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>16914</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1919</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1308</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>864</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>781</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2036</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>418</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>325</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>4823</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2164</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1024</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5109</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1472</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>899</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1083</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>44962</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>886</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>868</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3611</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2308</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>457</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1239</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>48493</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>895</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2261</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1026</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>827</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2267</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>17759</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3087</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>814</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3768</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4119</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>20801</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1613</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3517</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1351</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>439</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>7312</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>459</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>280</degree><frequency>6.188059777688795e-07</frequency></value> +<value><degree>1706</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10511</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1146</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>12775</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2255</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1085</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1751</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>794</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2755</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12653</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2338</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1758</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>647</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4400</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3848</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>716</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4240</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6094</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3736</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1326</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9790</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5611</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5654</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6778</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>661</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1458</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2145</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>77625</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1037</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2228</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6597</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1478</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1183</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>745</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2079</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2473</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5286</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>14816</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1569</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11952</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1528</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1615</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4281</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3756</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1377</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4957</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1796</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>898</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>3930</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>682</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>757</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>818</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1449</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7181</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1193</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2341</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3196</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2818</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2450</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2701</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2203</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1817</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3640</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>870</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2149</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2527</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2917</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1407</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3045</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1618</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5138</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3429</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5807</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>524</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>885</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>554</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2003</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>588</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>3827</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1400</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>455</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>24934</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1022</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4054</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1630</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5712</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2013</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1850</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1603</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>937</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2416</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11583</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3003</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3900</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1986</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2366</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>801</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1325</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>408</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>813</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1490</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2100</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7231</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>623</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>802</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>710</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>633</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1436</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>56188</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>753</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1368</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>57750</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2937</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>6546</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3490</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>530</degree><frequency>3.609701536985131e-07</frequency></value> +<value><degree>1041</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1949</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2649</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2814</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2552</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2977</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8415</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>790</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3832</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1134</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>702</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1285</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2215</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7435</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2034</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2420</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5123</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1686</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3948</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2933</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6951</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>739</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3808</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1172</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3885</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2098</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3809</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2624</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>390</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>3678</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>35767</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1723</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1382</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1461</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4571</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13489</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1314</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4811</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1875</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>478</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1538</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>849</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>880</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1140</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1120</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6299</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5279</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1406</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1313</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>806</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3180</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2859</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>772</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1965</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>865</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1541</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1792</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1275</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1015</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2258</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>575</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1632</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>986</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3351</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12142</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3881</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2740</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>25328</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2691</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5707</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9862</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>35732</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3955</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2156</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>6190</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3439</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7670</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5133</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2274</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3651</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3277</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8262</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>13676</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2178</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3929</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1945</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>674</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1640</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3650</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1545</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>936</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10354</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5308</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1814</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>19144</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1439</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2360</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>132315</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8406</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>925</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1623</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2544</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1622</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10841</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1190</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12797</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>32812</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1373</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>783</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1788</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2257</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5048</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>699</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10197</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3282</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>334</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>1745</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3991</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4579</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3798</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1977</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3088</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3544</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1194</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>24987</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1218</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>42644</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1446</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5874</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3519</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2650</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1164</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1923</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>668</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2039</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>18632</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1649</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1580</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1529</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2643</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1857</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1604</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4118</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>35735</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4480</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1403</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>686</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1474</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3807</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3324</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>620</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1426</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>861</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>983</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>13359</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1679</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1324</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2116</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1031</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3185</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1556</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1323</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5890</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1995</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1644</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>584</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1756</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>916</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>17380</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1251</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8104</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>18681</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11170</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5028</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2256</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4758</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1141</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1648</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1757</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12794</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2225</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4985</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>964</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1429</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8284</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7406</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1432</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2483</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1737</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1081</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7726</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5023</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2284</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12170</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1033</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1145</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2046</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>835</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2175</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2293</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>426</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1137</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>407</degree><frequency>3.0940298888443977e-07</frequency></value> +<value><degree>1305</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2835</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3692</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>8293</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>871</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1312</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1002</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>735</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>776</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>568</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3214</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>659</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1216</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>5267</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2678</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>655</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>873</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1685</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1581</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>890</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>3495</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1760</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1025</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1451</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1121</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2545</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>952</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1673</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2254</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1928</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>915</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4859</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1521</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>800</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1123</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1244</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>971</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>649</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1089</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2395</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2791</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2486</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2089</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2706</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1046</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>5418</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1397</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1827</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1209</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1248</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1213</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>979</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1336</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>409</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>919</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1153</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1980</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1177</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1794</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1094</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3013</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2092</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2026</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1563</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>810</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1199</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>951</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1500</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1062</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2105</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>546</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>1074</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>872</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2007</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2506</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>434</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>1176</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>746</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3367</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1680</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3163</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>845</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>731</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>826</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>893</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>595</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>37598</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>755</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>2260</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1245</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>618</degree><frequency>1.5470149444221988e-07</frequency></value> +<value><degree>2126</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2888</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3395</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>791</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4893</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2378</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1585</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>654</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>954</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>592</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>667</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>775</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>749</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>758</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1646</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1232</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>628</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1367</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7586</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>10491</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2023</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1149</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>4732</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1862</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3014</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2412</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2344</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1564</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1068</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2012</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1513</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7037</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1078</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>476</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4790</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2595</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6395</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1328</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1255</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1524</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1507</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>701</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2642</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3618</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4234</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5533</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1166</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>549</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1107</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1960</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1542</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4251</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1574</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1647</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2564</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2608</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>523</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>4539</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1614</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>635</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>503</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1174</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1220</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4204</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1440</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4041</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1999</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4909</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2518</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2336</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5688</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>643</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>14758</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1355</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3181</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1274</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1904</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1204</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1504</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2106</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>959</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4855</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5549</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>9203</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3005</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1135</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2011</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>5960</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1329</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6623</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1421</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>846</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1444</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2512</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2708</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>709</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>894</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3603</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1349</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2223</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1066</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>479</degree><frequency>2.578358240703665e-07</frequency></value> +<value><degree>11287</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2024</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>11647</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>972</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1088</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>650</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1493</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>7690</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3341</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>631</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>17407</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>6776</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3702</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1238</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1128</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1381</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1163</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1388</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1675</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1284</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1003</degree><frequency>2.062686592562932e-07</frequency></value> +<value><degree>3946</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2216</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2695</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3543</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2213</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>591</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>905</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1233</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3236</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2125</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>560</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1287</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1808</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1536</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>767</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>24139</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2193</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>556</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>3687</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1526</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2279</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2936</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1801</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1856</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1705</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1060</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1879</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3016</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1261</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1342</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2010</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1579</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1102</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1241</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1152</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1512</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1044</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1561</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3026</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2152</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2559</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1588</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1350</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2839</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3234</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3009</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1419</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1447</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2600</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1192</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>929</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2314</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12251</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>12202</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2457</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1019</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>2198</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1725</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2188</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4068</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2088</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4759</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1702</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2009</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>947</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>931</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1608</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>842</degree><frequency>1.031343296281466e-07</frequency></value> +<value><degree>1402</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2035</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2463</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1278</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1562</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1082</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2670</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3240</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1880</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2291</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1259</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1235</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1353</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>823</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1997</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2452</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1463</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>741</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1057</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>4024</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1800</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1642</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>2481</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3863</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3170</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1012</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>3047</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1876</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>1049</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>862</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>670</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>889</degree><frequency>5.15671648140733e-08</frequency></value> +<value><degree>684</degree><frequency>5.15671648140733e-08</frequency></value> +</indegrees> +</degrees> diff --git a/mainmatter/graph/Weisfeiler-Leman.tex b/mainmatter/graph/Weisfeiler-Leman.tex @@ -0,0 +1,23 @@ +\begin{algorithmic} + \Function{Weisfeiler--Leman}{} + \FunctionInputs{} \(G=(V, E)\) graph + \FunctionInputs*{} \(k\) dimensionality + \FunctionOutput{} \(\chi_\infty\) coloring of \(k\)-tuples + \State + \LComment{Initialization} + \State \(\ell\gets 0\) + \ForAll{\(\vctr{x}\in V^k\)} + \State \(\chi_0(\vctr{x}) \gets \operatorname{iso}(\vctr{x})\) + \EndFor + \LComment{Main Loop} + \Repeat + \State \(\ell\gets \ell+1\) + \State \(\symfrak{I}_\ell\gets \text{new color index}\) + \ForAll{\(\vctr{x}\in V^k\)} + \State \(c_\ell(\vctr{x}) \gets \lMultiBrace\,\chi_{\ell-1}(\vctr{y}) \middlerel{|} \vctr{y}\in\gfneighbors^k(\vctr{x})\,\rMultiBrace\) + \State \(\chi_\ell(\vctr{x}) \gets \text{index of }(\chi_{\ell-1}(\vctr{x}), c_\ell(\vctr{x})) \text{ in } \symfrak{I}_\ell\) + \EndFor + \Until{\(\chi_\ell = \chi_{\ell-1}\)} + \State \Output \(\chi_\ell\) + \EndFunction +\end{algorithmic} diff --git a/mainmatter/graph/analysis.tex b/mainmatter/graph/analysis.tex @@ -0,0 +1,216 @@ +\section{Preliminary Analysis and Proof of Principle} +\label{sec:graph:analysis} +In this section, we want to ensure the soundness of graph-based approaches by providing some statistics about a large relation extraction dataset. +In particular, we start by building an attributed multigraph as described in Section~\ref{sec:graph:encoding}. +We focus on \textsc{t-re}x (Section~\ref{sec:datasets:trex}, \cite{trex}), an alignment (Section~\ref{sec:relation extraction:distant supervision}) of Wikipedia with Wikidata. +This dataset has the advantage of being both large and publicly available. +Note that the graph we analyze in this section is not a knowledge base. +Each arc is both labeled with a relation and attributed with a sentence. +The fact that several arcs are incident to a vertex does not necessarily imply that the corresponding entity is linked by several relations, only that it was mentioned multiple times. + +\begin{marginfigure}[-97mm] + \centering + \renderDegrees{mainmatter/graph/T-REx degrees.xml} + \vspace{-5mm} + \scaption[\textsc{t-re}x vertices degree distribution.]{ + \textsc{t-re}x vertices degree distribution. + The lines give the frequency of vertices with the given in- and out-degree in the dataset. + Note that both axes are log-scaled. + This plot was cut at a degree of \maxdisplayeddegree, which corresponds to a minimum frequency of \(10^{-5}\) out of a total of \numberarcs{} arcs. + In reality, the vertex with the maximum degree is ``United States of America'' \wdent{30} with an \maxdegreetype-degree of \maxdegree. + The asymmetry between the distribution of in-degrees and out-degrees can be explained by the fact that knowledge bases prefer to encode many-to-one relations instead of their one-to-many converse. + \label{fig:graph:degree distribution} + } +\end{marginfigure} + +Figure~\ref{fig:graph:degree distribution} shows the distribution of vertices' degrees in the graph associated with \textsc{t-re}x. +The first thing we can notice about this graph is that it is \emph{scale-free}. +This means that a random vertex \(v\in\entitySet\) has degree \(\gfdegree(v)=k\) with probability \(P(k) \propto k^{-\gamma}\) for a parameter \(\gamma\) which depends on the graph. +In other words, the distribution of degrees follows a power law. +In a scale-free graph, a lot of vertices have few neighbors. +In contrast, the distribution of degrees in a random Erdős--Rényi graph% +\sidenote[][-22mm]{ + There are several different ways to sample random graphs; the Erdős--Rényi model is one of them. + In this model, arcs are incrementally added between two uniformly chosen vertices. + In contrast, if vertices with already high degrees are selected more often (the Barabási--Albert model), the resulting graph is scale-free. +} +is expected to follow a binomial distribution. +Scale-free graphs occur in a number of contexts, such as social networks and graphs of linked web pages. +Most unsupervised relation extraction datasets and knowledge bases should be expected to be scale-free. +This needs to be kept in mind when designing graph-processing algorithms for relation extraction. +Indeed most vertices have a small neighborhood, so we might be tempted to take neighbors of neighbors carelessly. +However, scale-free graphs have a very small diameter% +\sidenote{ + The diameter of a graph is the length of the longest shortest-path: + \begin{equation*} + D = \max_{u,v\in \entitySet} \delta(u, v), + \end{equation*} + where \(\delta(u, v)\) is the length of the shortest path from \(u\) to \(v\). +} +\(D\in O(\log\log n)\). +This means that we can quickly reach most vertices following a small number of arcs. +This is in part due to the fact that some vertices have very high degree, for example in \textsc{t-re}x, the vertex ``United States of America'' \wdent{30} is highly connected with \(\gfdegree(\wdent{30})=1\,697\,334\). +In particular, this implies that by considering neighbors of neighbors, we quickly need to consider the whole graph; this is particularly problematic for graph convolutional networks described in Section~\ref{sec:graph:related work}. + +We now come to the main incentive for taking a graph-based approach to the unsupervised relation extraction task: +\begin{spacedblock} + \strong{Hypothesis:} + \emph{In the relation extraction problem, we can get additional information from the neighborhood of a sample.} +\end{spacedblock} +To test this hypothesis, we compute statistics on the distribution of neighbors. +However, as we just saw, the support of this distribution is of high dimension. +Hence, we look at the statistics of paths in our multigraph.% +\sidenote{ + Paths of length \(k\) are in a domain of size \(|\relationSet|^k\), whereas neighbors are in a domain of size \(|\relationSet|^{\upDelta(G)}\) with \(\upDelta(G)\) designating the maximum degree in \(G\). + By studying paths of length 3, we are effectively studying a subsampled neighborhood of the central arc. +} +As a graph theory reminder, we can formally define a path as follows: +\begin{itemize}[nosep] + \item A \emph{walk} on length \(n\) is a sequence of arcs \(a_1, a_2, \ldots, a_n \in \arcSet\) such that \(\gftarget(a_{i-1}) = \gfsource(a_i)\) for all \(i=2, \dotsc, n\). + \item A \emph{trail} is a walk with \(a_i \neq a_j\) for all \(1\leq i < j \leq n\) (arcs do not repeat). + In practice this means that \((s, \vctr{e})\) do not repeat. + It is not a statement about relations conveyed by these arcs; it is entirely possible that for some \(i\), \(j\) we have \(\gfrelation(a_i)=\gfrelation(a_j)\). + \item A \emph{path} is a trail with \(\gfsource(a_i) \neq \gfsource(a_j)\) for all \(1\leq i < j \leq n\) (vertices do not repeat). +\end{itemize} +It is also possible to base these definitions on \emph{open walks}, which are walks where \(\gfsource(a_1) \neq \gftarget(a_n)\) (the walk does not end where it started). +We base the discussion of this section around the following random path: +\begin{center} + \input{mainmatter/graph/3-path.tex} +\end{center} +Using these definitions, we can restate our hypothesis. +\begin{marginparagraph} + The symbol \(\notindependent\) is used to mean ``not independent'': + \begin{equation*} + \rndm{a}\notindependent\rndm{b} \iff P(\rndm{a}, \rndm{b})\neq P(\rndm{a})P(\rndm{b}) + \end{equation*} +\end{marginparagraph} +In this path, we expect \(\rndm{r}_2\notindependent\rndm{r}_1\) and \(\rndm{r}_2\notindependent\rndm{r}_3\). +However, enumerating all possible paths in a graph with \(n=2\,819\,966\) vertices and \(m=19\,392\,185\) arcs is not practical. + +To approximate path statistics, we turn to sampling. +However, uniformly sampling paths is not straightforward. +As a first intuition, to uniformly sample a path of length 1---that is, an arc---we can use the following procedure: +\begin{marginparagraph} + \(\operatorname{Cat}(\entitySet, f)\) refers to the Categorical distribution over the set \(\entitySet\) where the probability of picking \(e\in \entitySet\) is \(f(e)\). + The \(2m\) appears from the normalization factor \(\sum_{e\in \entitySet} \gfdegree(e)=2m\). +\end{marginparagraph} +\begin{enumerate}[nosep] + \item Sample an entity \(e_1\) weighted by its degree,\\ + \(e_1\sim \operatorname{Cat}\left(\entitySet, e\mapsto \gfdegree(e) \divslash 2m\right)\) + \item Uniformly sample an arc incident to the entity \(e_1\).\\\(a\sim \uniformDistribution(\gfincidents(e_1))\) +\end{enumerate} +The first vertex we select must be weighted by how many paths start there, and since paths of length 1 are arcs, we weight each vertex by its degree.% +\sidenote[][-11mm]{ + To give an intuition, we can also think of what would happen if we chose both the entity and incident arc uniformly. + An arc that links two entities otherwise unrelated to any other entities is likely to be sampled since sampling any of its two endpoints as \(e_1\) would guarantee we select this arc. + On% XXX manual page break +} +If we want to sample paths of length 2, the first vertex must be selected according to the number of paths of length 2 starting there. +Then the second vertex is selected among the neighbors of the first weighted by the number of paths of length 1 starting there, etc. + +\leavevmode +\begin{marginparagraph}[-115mm]% XXX manual page break + the other hand, an arc whose both endpoints have high degrees has little chance of being sampled since even if one of its endpoints is selected as \(e_1\) in the first step, the arc is unlikely to be selected in the second step. +\end{marginparagraph} +Sadly enough, counting paths is \#P-complete% +\sidenote{A functional complexity class at least as hard as NP-complete.} +\parencite{path_counting_sharp_p} so we must rely on the regularity of our graph and turn to approximate algorithms. +We propose to use the number of walks as an approximation of the number of paths.% +\sidenotemark%No room here, moved to next page. +A classical result on simple graphs \(G=(V, E)\) is that the powers of the adjacency matrix \(\mtrx{M}\) count the number of walks between pairs of vertices. +For any two vertices \(u, v\in V\), the value \(m^k_{uv}\)---to be interpreted as \((\mtrx{M}^k)_{uv}\)---is the number of walks of length \(k\) from \(u\) to \(v\). +In the case of our multigraph, if we wish to count walks, the adjacency matrix should contain the number of arcs---that is, the number of walks of length 1---between vertices. + +\sidenotetext{ + Other approximations of path counting exist \parencite{path_counting_estimation}, but the approach we propose is particularly suited to our multigraph. + In particular, the shape parameter \(\gamma\) of our degree distribution is relatively small, which produces a large number of outliers. + Our importance-sampling-based approach allows us to reduce the variance of the frequency estimations. +} +\begin{algorithm} + \centering + \begin{minipage}[b]{9cm} + \input{mainmatter/graph/path counting.tex} + \end{minipage} + \scaption[Path counting algorithm]{ + Path counting algorithm. + The higher the number of iterations of the main loop, the more precise the results will be. + In our experiments, we used one billion iterations. + The inner for loop builds the walk \(\vctr{a}\). + If it is a correct path, the relation type of the path is added to the counter with importance weight \(w\). + For numerical stability, we actually compute \(w\) in log-space. + The initial factor \(n=|\entitySet|\) in \(w\) comes from the preceding uniform sampling of \(v\) from \(\entitySet\), which is part of the computation of \(\symcal{F}^k\). + \label{alg:graph:path counting} + } + \vspace{-5mm} +\end{algorithm} + +We could then build a Monte Carlo estimate by following the naive procedure above of sampling vertices one by one according to the number of walks starting with them. +Let's call \(\symcal{W}^k\) this distribution over walks of length \(k\). +Sampling from \(\symcal{W}^k\) is particularly slow since it involves sampling from a categorical distribution over thousands of elements. +Since we only want to evaluate a (counting) function over an expectation \(\expectation_{\vctr{a}\sim\symcal{W}^k}\), we can instead perform importance sampling. +We use the substitute distribution \(\symcal{F}^k\) that uniformly selects a random neighbor at each step. +To make this trick work, we only need to compute the importance weights \(\frac{\symcal{W}^k(\vctr{a})}{\symcal{F}^k(\vctr{a})}\) for all walks \(\vctr{a}\in\arcSet^k\). +Since \(\symcal{W}^k\) is the uniform distribution over all walks, it is constant \(\symcal{W}^k(\vctr{a}) = (\symbf{1}\transpose\mtrx{M}^k\symbf{1})^{-1}\). +On the other hand \(\symcal{F}^k(\vctr{a})\) can be trivially computed as the product of inverse degrees of \(a_i\). +The resulting counting procedure is listed as Algorithm~\ref{alg:graph:path counting}. +We still need to reject non-paths at the end of the main loop. +Note that this algorithm is not exact since the importance weights \(w\) are computed from the number of walks, not paths. + +\begin{table}[t] + \centering + \input{mainmatter/graph/paths frequencies.tex}% + \scaption*[Frequencies of some paths of length 3 in \textsc{t-re}x.]{ + Frequencies of some paths of length 3 in \textsc{t-re}x. + The first column gives the approximate per mille frequency of paths with the given type. + It is computed as the importance weight attributed to the path by the counter \(C\) in Algorithm~\ref{alg:graph:path counting} divided by the sum of all importance weights in \(C\). + We use \({}_\textsc{st}\) as an abbreviation of ``sport team.'' + The path in the first row is the most frequent one in the dataset; other paths were selected for illustrative purposes. + The last path was sampled a single time with an importance weight of 0.89. + \label{tab:graph:paths frequencies} + } +\end{table} + +Using this algorithm on one billion samples from \textsc{t-re}x, we find that the most common paths of length three are related to geopolitical relations,% +\sidenote{This is not surprising as most general knowledge datasets are dominated by geopolitical entities and relations.} +see Table~\ref{tab:graph:paths frequencies}. +Let us now turn to statistics that could help relation extraction models. +To showcase the dependency between a sample's relation \(\rndm{r}_2\) and its neighbors \(\rndm{r}_1\) and \(\rndm{r}_3\), we investigate the distribution \(P(\rndm{r}_2\mid\rndm{r}_1, \rndm{r}_3)\). +In other words, given a sample, we want to see how its relation is influenced by the relations of two neighboring samples. + +The first value we can look at is the entropy% +\sidenote{ + This is not a conditional entropy. + The context relations \(r_1\), \(r_3\) are fixed; they correspond to elementary events, not random variables (as shown by the fact that they are italicized, not upshape). +} +\(\entropy(\rndm{r}_2\mid r_1, r_3)\). +For example, in the case of \(r_1=\textsl{sport}\) and \(r_3=\widebreve{\textsl{member of}_\textsc{st}}\), all observed values of \(\rndm{r}_2\) are given in Table~\ref{tab:graph:paths frequencies}. +All of them were \(\widebreve{\textsl{sport}}\) with the exception of a single path, which means that \(\entropy(\rndm{r}_2\mid r_1, r_3)\approx 0\). +In other words, if we are given a sample \((s, \vctr{e})\in\dataSet\) and we suspect another sentence containing \(e_1\) to convey \textsl{sport} and another containing \(e_2\) to convey \(\widebreve{\textsl{member of}_\textsc{st}}\), we can be almost certain that the sample \((s, \vctr{e})\) conveys \(\widebreve{\textsl{sport}}\). + +\begin{marginparagraph} + As a reference for the remainder of this section, the distribution of relation in \textsc{t-re}x has an entropy of \(\entropy(\rndm{r})\approx 6.26\ \text{bits}\). + This is for a domain of \(|\relationSet|=1\,316\) relations. +\end{marginparagraph} +To measure this type of dependency at the level of the dataset, we can look at the following value: +\begin{equation*} + \kl\left(P(\rndm{r}_2\mid r_1, r_3) \middlerel{\|} P(\rndm{r}_2)\right) +\end{equation*} +\begin{marginparagraph} + To give a first intuition of what this value represents, we take once again the trivial example of \(r_1=\textsl{sport}\) and \(r_3=\widebreve{\textsl{member of}_\textsc{st}}\). + In this case, \(\kl\left(P(\rndm{r}_2\mid r_1, r_3) \middlerel{\|} P(\rndm{r}_2)\right) \approx 5.47\ \text{bits}\). + This is due to the fact that encoding \(\rndm{r}_2\) given its neighbors necessitates close to 0~bits (as shown in Table~\ref{tab:graph:paths frequencies}, \(\rndm{r}_2\) almost always takes the value \(\widebreve{\textsl{sport}}\)) but encoding \(\widebreve{\textsl{sport}}\) among all possible relations in \(\relationSet\) necessitates 5.47~bits (which is a bit less than most relations since \(\widebreve{\textsl{sport}}\) commonly appears in \textsc{t-re}x). +\end{marginparagraph} +The Kullback--Leibler divergence is also called the \emph{relative entropy}. +Indeed, \(\kl(P\mathrel{\|}Q)\) can be interpreted as the additional quantity of information needed to encode \(P\) using the (suboptimal) entropy encoding given by \(Q\). +If this value is 0, it means that no additional information was provided by \(r_1\) and \(r_3\). +When marginalizing over all possible contexts \(r_1\), \(r_3\), we obtain the mutual information between the relation of a sample \(r_2\) and the relation of two of its neighbors. +On \textsc{t-re}x, we observe: +\begin{equation*} + \operatorname{I}(\rndm{r}_2; \rndm{r}_1, \rndm{r}_3) \approx 6.95\ \text{bits} +\end{equation*} +In other words, we can gain 6.95 bits of information simply by modeling two neighbors (one per entity). +These 6.95 bits can be interpreted as the number of bits needed to perfectly encode \(\rndm{r}_2\) given \(\rndm{r}_1\), \(\rndm{r}_3\) (the conditional entropy \(\entropy(\rndm{r}_2\mid\rndm{r}_1,\rndm{r}_3)\approx 1.06\ \text{bits}\)) substracted from the number of bits needed to encode \(\rndm{r}_2\) without looking at its neighbors (the cross-entropy \(\expectation_{r_1, r_3}[\entropy_{P(\rndm{r}_2)}(\rndm{r}_2\mid r_1, r_3)]\approx 8.01\ \text{bits}\)).% +\sidenote{ + We denote the cross-entropy by \(\entropy_Q(P) = -\expectation_P[\log Q]\). +} +In other words, most of the uncertainty about the relation of a sample can be removed by looking at the relations of two of its neighbors. diff --git a/mainmatter/graph/approach.tex b/mainmatter/graph/approach.tex @@ -0,0 +1,242 @@ +\section{Proposed Approaches} +\label{sec:graph:approach} +We now turn to the graph-based models we propose to leverage information from the structure of the dataset. +Let us quickly summarize the context in which we inscribe our work. +We have access to two kinds of features: linguistic---from the sentence---and topological---from the graph. +Unsupervised relation extraction methods do not fully exploit graph neighborhoods.% +\sidenote{As explained in Section~\ref{sec:graph:encoding}, \textsc{mtb} does use close neighborhoods as contrast during training, but not for inference.} +Supervised methods such as \textsc{epgnn} and \textsc{gp-gnn} do, even though the information present in the graph is more important in the unsupervised setting. +Indeed, the relational information is mostly extractable from the sentences and entities alone. +While extra information from topological features can still be used by supervised models, it is not essential. +On the other hand, in the unsupervised setting, the main issue is to identify the relational information in the sentence, to distinguish it from other semantic contents. +As we show in Section~\ref{sec:graph:analysis}, this relational information is also present in the topological features (the neighborhood of a sample). +This can be useful in two ways: +\begin{enumerate} + \item Use both pieces of information jointly, linguistic and topological: ``the more features, the better.'' + This is what supervised models do. + \item Use the topological features to identify the relational information in the linguistic features. +\end{enumerate} + +In Section~\ref{sec:graph:topological features}, we exploit the first point by adding a \textsc{gcn} to the matching the blanks model (\textsc{mtb}, Section~\ref{sec:relation extraction:mtb}). +In Section~\ref{sec:graph:nonparametric wl}, we show that topological features can be used without training a \textsc{gcn}. +This also serves as an introduction to Section~\ref{sec:graph:refining}, which proposes an unsupervised loss following the second point above; it exploits the fact that relation information is present in both linguistic and topological features. + +\subsection{Using Topological Features} +\label{sec:graph:topological features} +In this section, we seek to use topological information as additional features for an existing unsupervised model: matching the blanks (\textsc{mtb}). +The usefulness of these features lies in the fact that many relations are ``typed'': e.g.~they only accept geographical locations as objects and only people as subjects (such as \emph{born in}). +This can be captured by looking at the neighborhood of each entity, which can be seen as a ``soft'' version of \hypothesis{type} (``relations are typed,'' Section~\refAssumptionSection{type}). + +A straightforward approach is to parallel the construction of \textsc{r-gcn} (Section~\ref{sec:graph:r-gcn}): use a \textsc{gcn}-like encoder followed by a relation classifier---in the case of \textsc{r-gcn}, DistMult. +In effect, this corresponds to taking \textsc{mtb} and augmenting it with a \textsc{gcn} to process neighboring samples. +As a reminder, \textsc{mtb} uses a similarity-based loss where each unsupervised sample \((s, \vctr{e})\in\dataSet\) is represented by \(\bertcoder(s)\). +In this model, the information lies on the arcs. +In order to use a \textsc{gcn} model, we transform our graph \(G=(\entitySet, \arcSet, \gfendpoints, \gfrelation, \gfsentence)\) such that the information lies on the vertices instead. +This transformed graph is called the \emph{line graph} of \(G\) and noted \(L(G)\). +An illustration for simple undirected graphs is provided in Figure~\ref{fig:graph:line graph}. +For a directed (multi)graph, it is defined as follows: +\begin{marginfigure} + \centering + \input{mainmatter/graph/line graph.tex} + \scaption[Example of line graph construction.]{ + Example of line graph construction. + Each edge \(x\,\text{---}\,y\) in the simple undirected graph \(G\) corresponds to the vertex \(xy\) with the same color in the graph \(L(G)\). + Two vertices in \(L(G)\) are connected iff the corresponding edges share an endpoint in \(G\). + In directed graphs, the two arcs further need to be in the same direction in \(G\) for an arc to exist in \(L(G)\). + \label{fig:graph:line graph} + } +\end{marginfigure} +\begin{align*} + L(G) & = (\arcSet, \symfrak{A}, \gfendpoints, \gfsentence) \\ + \symfrak{A} & = \left\{\,(a_1, a_2)\in\arcSet^2 \middlerel{|} \gftarget(a_1) = \gfsource(a_2) \,\right\}. +\end{align*} +In other words, each arc becomes a vertex and an arc \(a_1\to a_2\) is present if and only if \(a_1\) and \(a_2\) form a directed path of length 2. +The neighborhood of each sample (arc is the original \(G\)) is still defined as all other samples with at least one entity in common since by construction for all \textsc{v}-structures \inlineDoubleArc*[a_1][a_2]{e_1}{e_2}{e_3}, there exists a directed path \inlineDoubleArc[a_1][\breve{a}_2]{e_1}{e_2}{e_3} in the original graph \(G\). +This construction is actually similar to the one of \textsc{epgnn} introduced in Section~\ref{sec:relation extraction:epgnn}. +The main difference is that each vertex in \(L(G)\) corresponds to a sample in \(\dataSet\), while an \textsc{epgnn} graph groups samples by entity pairs into a single vertex. + +The standard loss and training algorithm of \textsc{mtb} as defined by Equation~\ref{eq:relation extraction:mtb loss} can be reused as is, we only need to redefine the similarity function (Equation~\ref{eq:relation extraction:mtb similarity}): +\begin{equation} + \operatorname{sim}(a, a', G) = \sigmoid\left( + \begin{aligned} + \bertcoder(\gfsentence(a))\transpose\bertcoder(\gfsentence(a'))\hspace{2cm} \\ + + \lambda \operatorname{\textsc{gcn}}(L(G))_a\transpose\operatorname{\textsc{gcn}}(L(G))_{a'} + \end{aligned} + \right), + \label{eq:graph:mtb-gcn} +\end{equation} +where \(\lambda\) is a hyperparameter weighting the topological-based prediction over the sentence-based one. +At the input of the \textsc{gcn}, the vertices are labeled using the same sentence encoder: \(\vctr{x}_a = \bertcoder(\gfsentence(a))\). + +The only difference between \textsc{mtb} and the \textsc{mtb}--\textsc{gcn} hybrid we propose is the additional \(\lambda\)-weighted term in Equation~\ref{eq:graph:mtb-gcn}. +We use this model to evaluate whether topological features can be exploited by an existing unsupervised relation extraction loss. +It tells us how much can be gained from the ``adding more features'' aspect of graph-based methods and contrast it with the new topology-aware loss design we propose in Section~\ref{sec:graph:refining}. + +\subsection{Nonparametric Weisfeiler--Leman Iterations} +\label{sec:graph:nonparametric wl} +The losses used to train unsupervised \textsc{gnn}s usually make the hypothesis that linked vertices should have similar representations. +This can be seen in \loss{gs} (Equation~\ref{eq:graph:graphsage loss}), which seeks to maximize the dot product between the representations of adjacent vertices. +While this hypothesis might be helpful for most problems on which \textsc{gnn}s are applied, this is clearly not the case for relation extraction. +In Section~\ref{sec:graph:topological features}, we introduced a first simple solution to this problem is to replace the loss used by the \textsc{gnn} with a standard unsupervised relation extraction loss. +However, it is also possible to design an unsupervised loss from the theoretical foundation of \textsc{gcn}: the Weisfeiler--Leman isomorphism test. +To this end, we propose to build a model relying on the following hypothesis: +\begin{spacedblock} + \strong{Weak Distributional Hypothesis on Relation Extraction Graph:} + \emph{Two arcs conveying similar relations have similar neighborhoods.} +\end{spacedblock} +Note that we dubbed this version of the distributional hypothesis \emph{weak} since we only state it in one direction, the converse having several counter-examples. +For example, sentences about the place of birth and the place of death of a person tend to have similar neighborhoods despite conveying different relations.% +\sidenote{ + The neighborhoods are somewhat dissimilar in that ``notable'' people tend to die in places with more population than their birthplace. + However, whether current models can pick this up from other kinds of regularity in a dataset is dubious. +} +To distinguish these kinds of relations with similar neighborhoods, we have to rely on sentence representations.% +\sidenote[][21mm]{ + This can partly explain the conditional entropy \(\entropy(\rndm{r}_2\mid\rndm{r}_1,\rndm{r}_3)\approx 1.06\ \text{bits}\) given in Section~\ref{sec:graph:analysis}. +} + +Following this hypothesis, we first propose a simple parameter-less approach based on the Weisfeiler--Leman isomorphism test (Section~\ref{sec:graph:weisfeiler-leman}). +\emph{We can say that two neighborhoods are similar if they are isomorphic.} +Therefore, we can enforce the hypothesis above by ensuring that if two neighborhoods are assigned similar coloring by the \textsc{wl} algorithm, they convey similar relations. +In the relation extraction problem, contrary to much of the related work presented in Section~\ref{sec:graph:related work}, we have data on the arcs of the graph, not on the vertices. +This means that instead of using the 1-dimensional Weisfeiler--Leman algorithm, we use the 2-dimensional version. +In other words, instead of coloring the vertices, we color the arcs since our problem is to label them with a relation. + +The initial coloring \(\chi_0(a)\) is initialized as the isomorphism class of a sample \(a\in\arcSet\). +We can define this isomorphism class using \(\bertcoder(a)\), which means that the initial representation of a sample will simply be the sentential representation of the sample. +The difficult task is to define the re-indexing of colors as performed by \(\symfrak{I}\) in Algorithm~\ref{alg:graph:weisfeiler-leman}. +This is difficult since the original \textsc{wl} algorithm is defined on a discrete set of colors, while we need to manipulate distributed representations of sentences. + +\begin{marginparagraph} + The astute reader might have noticed that the 2-dimensional \textsc{wl} isomorphism test as described in Algorithm~\ref{alg:graph:weisfeiler-leman} loops over pairs of vertices, not arcs. + This is impractical in our relation extraction graph, which is particularly sparse---the number of arcs \(m\) is far larger than the number of vertices \(n\). + The extra (unlinked) entity pairs considered by Algorithm~\ref{alg:graph:weisfeiler-leman} are usually referred to as \emph{anti-arcs}. + Ignoring anti-arcs leads to the local Weisfeiler--Leman isomorphism tests since only the ``local neighborhood'' is considered. + Other intermediate approaches are possible, sometimes referred to as the \emph{glocalized} variants of Weisfeiler--Leman. + See \textcite{weisfeiler-leman_sparse} for an example of application to graph embeddings. + Alternatively, our proposed approach can be seen as a 1-dimensional Weisfeiler--Leman isomorphism test applied to the line graph. +\end{marginparagraph} + +If we want to produce clear-cut relation classes, we can use a hashing algorithm on sentence representations such as the one proposed for graph kernels by \textcite{graph_continuous_hashing}. +However, we focus on a few-shot evaluation in order to compare with \textsc{mtb} and to avoid errors related to knowledge base design as described in Section~\ref{sec:relation extraction:few-shot}. +In this case, we only need to be able to compare the colors of two different samples, measuring how close they are to each other. +Let us define \(\gfeneighbors: \arcSet\to2^\arcSet\) the function mapping an arc to the set of its neighbors. +Formally, for \(a\in\arcSet\), \(\gfeneighbors(a) = \{a'\in\arcSet\mid \gfendpoints(a)\cap\gfendpoints(a')\neq\emptyset\}\). +In other words, \(\gfeneighbors\) in \(G\) corresponds to the neighbors function \(\gfneighbors\) in the line graph \(L(G)\). +Since \(\arcSet\) can be seen as the set of samples, \(\gfeneighbors(a)\) can be seen as the set of samples with at least one entity in common with \(a\). +To enforce the weak distributional hypothesis on graphs stated above, we take two first-order neighborhoods \(\gfeneighbors(a), \gfeneighbors(a')\subseteq\arcSet\) and define a distance between them. +This corresponds to comparing two empirical distributions of sentence representations% +\sidenote{ + We are comparing sentence representations and not directly sentences since the initial coloring \(\chi_0\) has been defined using \bertcoder. +} +that have an entity in common with \(a\) and \(a'\). +This can be done using the 1-Wasserstein distance between the two neighborhoods since they can be seen as two distributions of Dirac deltas in \bertcoder{} representation space.% +\sidenote{ + Wasserstein distance has the advantage of working on distributions with disjoint supports. +} +This needs to be done for the two entities, which correspond to the in-arc-neighbors \(\gfeneighbors_\gfnleft\) and out-arc-neighbors \(\gfeneighbors_\gfnright\). +While this is 1-localized, we can generalize this encoding to be \(K\)-localized by defining the \(k\)-sphere centered on an arc \(a\), where the 1-sphere corresponds to \(\gfeneighbors\): +\begin{align*} + S_\gfnright(a, 0) & = \{\,a\,\} \\ + S_\gfnright(a, k) & = \{\,x\in\arcSet \mid \exists y\in S_\gfnright(a, k-1): \gfsource(x)=\gftarget(y)\,\}. +\end{align*} +This sphere can be embedded using \bertcoder{}, which corresponds to retrieving its initial coloring: +\begin{equation*} + \symfrak{S}_\gfnright(a, k) = \{\,\bertcoder(\gfsentence(x))\in\symbb{R}^d \mid x\in S_\gfnright(a, k)\,\}. +\end{equation*} +We can thereafter define the \(K\)-localized out-neighborhood of \(a\in\arcSet\) as the sequence of \(\symfrak{S}_\gfnright(a, k)\) for all \(k=1,\dotsc,K\). +The in-neighborhood is defined similarly. +Finally, the distance between two samples \(a, a'\in\arcSet\) can be defined as: +\begin{marginparagraph} + To be precise Equation~\ref{eq:graph:topological distance} defines a distance between samples from the Euclidean distances between neighboring samples---that is samples with an entity in common. + The distance \(W_1\) is the cost of the optimal transport plan between two sets of Dirac deltas corresponding to the neighborhoods of the samples. +\end{marginparagraph} +\begin{equation} + d(a, a'; \vctr{\lambda}) = + \sum_{k=0}^K \frac{\lambda_k}{2} + \sum_{o\in\{\gfoleft, \gforight\}} + W_1\left(\symfrak{S}_o(a, k), \symfrak{S}_o(a', k)\right), + \label{eq:graph:topological distance} +\end{equation} +where \(W_1\) designates the 1-Wasserstein distance, and \(\vctr{\lambda}\in\symbb{R}^{K+1}\) weights the contribution of each sphere to the final distance value. +In particular \(\lambda_0\) parametrizes how much the linguistic features should weight compared to topological features.% +\sidenote{ + The 1-Wasserstein distance is defined on top of a metric space; therefore, the difference between two neighbors must be defined using the Euclidean distance. + We can't use dot product as usually done with \textsc{bert} representations (see for example Equation~\ref{eq:relation extraction:mtb similarity}). + However, we can slightly change Equation~\ref{eq:graph:topological distance} to use the dot product for the computation of the linguistic similarity (the term \(k=0\)). + In this case, however, \(d\) would no longer satisfy the properties of a metric. +} + +To relate this function back to our original re-coloring problem, the distance \(d\) up to \(K\) can be seen as a distance on \(\chi_K\), the coloring assigned at step \(K\). +Indeed, if \(d(a, a', \vctr{\lambda}) = 0\) then \(\chi_K(a)=\chi_K(a')\). +However, while two colors are either equal or not in the original algorithm, the distance \(d\) gives a topology to the set of arcs. +We don't directly compute a hard-coloring of 2-tuples. +The closest thing to a coloring \(\chi\) in our algorithm is the sphere embedding \(\symfrak{S}\), which in fact, is more akin to \(c\) in Algorithm~\ref{alg:graph:weisfeiler-leman}. +In other words, we skip the re-indexing step of the Weisfeiler--Leman algorithm to deal with the continuous nature of sentence embeddings at the cost of a higher computational cost. + +Combining a Wasserstein distance with Weisfeiler--Leman was already proposed for graph kernels \parencite{weisfeiler-leman_wasserstein}. +However, this was applied to a simple graph without attributed edges, and it was unrelated to any information extraction task. +For unsupervised relation extraction, the distance function \(d\) can directly be used to compute the similarity between query and candidates samples in a few-shot problem (Section~\ref{sec:relation extraction:few-shot}). +Since the number of arcs at distance \(k\) grows quickly in a scale-free graph,% +\sidenote{ + Remember that the diameter of the (scale-free) graph is in the order of \(\log\log n\). +} +we either need to keep \(K\) low or employ sampling strategies similarly to Graph\textsc{sage} (Section~\ref{sec:graph:spatial gcn}). +Furthermore, the Wasserstein distance is hard to compute exactly; entropic regularization of the objective has been proposed. +In particular, \(W_1\) can be efficiently computed with Sinkhorn iterations \parencite{sinkhorn}. + +\subsection{Refining Linguistic and Topological Features} +\label{sec:graph:refining} +While the nonparametric method presented in the \hyperref[sec:graph:nonparametric wl]{previous} section manages to consider both the linguistic and topological features, it processes them in isolation. +In this section, we propose a scheme that allows both the encoder of linguistic and topological features to adapt to each other in a training process. +Conceptually, this is somewhat similar to Self\textsc{ore} (Section~\ref{sec:relation extraction:selfore}). +As a reminder, Self\textsc{ore} is a clustering method that purifies relation clusters by optimizing \bertcoder{} such that samples with close linguistic forms are pushed closer. +In our scheme, we propose to refine both linguistic and topological features with respect to each other. +\begin{marginparagraph} + As a reminder, \hypothesis{\ctxoneadj} states that two samples with similar contextualized embeddings convey similar relations. + See Appendix~\ref{chap:assumptions}. +\end{marginparagraph} +In this way we hope to both enforce \hypothesis{\ctxoneadj} and the following assumption:% +\begin{assumption}[oneneighborhood]{1-neighborhood} + Two samples with the same neighborhood in the relation extraction graph convey the same relation. + + \smallskip + \noindent + \( \forall a, a'\in\arcSet\colon \gfeneighbors(a) = \gfeneighbors(a') \implies \gfrelation(a)=\gfrelation(a') \) +\end{assumption} +Note that this is the converse of the weak distributional hypothesis on relation extraction graph stated in Section~\ref{sec:graph:nonparametric wl}. +We need to make the modeling hypothesis in this direction since in the unsupervised relation extraction problem, we do not have access to relations and therefore can't enforce an hypothesis between samples conveying the same relations. +We posit that by balancing \hypothesis{\ctxoneadj} and \hypothesis{1-neighborhood} we are able to exploit the structure induced by both sources information in an unsupervised samples \((s, \vctr{e})\in\dataSet\): the sentence \(s\) and entities \(\vctr{e}\), whereas Self\textsc{ore} only relies on the sentence \(s\). + +To define the topological and linguistic distance between two samples, we use the distance function defined by Equation~\ref{eq:graph:topological distance}. +For computational reasons, we set \(K=1\), which means that our model is 1-localized. +The linguistic distance is simply the distance between the \bertcoder{} of the samples' sentences. +In other words, it is \(d(a, a'; [1, 0]\transpose)\). +On the other hand, the topological distance can be defined as the distance between the two neighborhoods, in other words, \(d(a, a'; [0, 1]\transpose)\). +We propose to train \bertcoder{} such that these two distances coincide more. +In practice, this can be achieved with a triplet loss similar to the one used by TransE (Section~\ref{sec:context:transe}). +Given three arcs \(\vctr{a}\in\arcSet^3\), we ensure the two distances are similar between the two first arcs \(a_1\) and \(a_2\), and we contrast these distances using the third arc \(a_3\). +This translates to the following loss: +\begin{marginparagraph} + Intuitively, we want to optimize the mean squared error (\textsc{mse}) between the linguistic and topological features of all pairs of arcs \((d(a_1, a_2, [1, 0]\transpose) - d(a_1, a_2, [0, 1]\transpose))^2\). + However, this loss could be optimized by encoding all arcs into a single point. + The output of \bertcoder{} would then be constant. + Therefore, we need to regularize the \textsc{mse} loss such that distances that shouldn't be close are not. + This is the point of the triplet loss; we contrast the positive distance delta with a negative one. + While \(d(a_1, a_2, [1, 0]\transpose)\) and \(d(a_1, a_2, [0, 1]\transpose)\) should be close to each other (because of \hypothesis{1-neighborhood}), they shouldn't be close to any distance involving a third sample \(a_3\). + This ensures that our model does not collapse. +\end{marginparagraph} +\begin{equation*} + \loss{lt}(a_1, a_2, a_3) = \max\left( + \begin{aligned} + 0, \zeta & + + 2 \big(d(a_1, a_2, [1, 0]\transpose) - d(a_1, a_2, [0, 1]\transpose)\big)^2 \\ + & \hspace{5mm} - \big(d(a_1, a_2, [1, 0]\transpose) - d(a_1, a_3, [0, 1]\transpose)\big)^2 \\ + & \hspace{5mm} - \big(d(a_1, a_3, [1, 0]\transpose) - d(a_1, a_2, [0, 1]\transpose)\big)^2 + \end{aligned} + \right), +\end{equation*} +where \(\zeta > 0\) is a hyperparameter defining the maximum margin we seek to enforce between the true distance-error and the negative distance-error. +By randomly sampling arcs triplets \(\vctr{a}\in\arcSet^3\), we can fine-tune a \bertcoder{} in an unsupervised fashion such that it captures both linguistics and topological features. +During evaluation, the procedure described in Section~\ref{sec:graph:nonparametric wl} can be reused, such that both linguistic representations refined by the topological structure and the topological representations refined by the linguistic structure are used jointly. +However, both distances could be used independently, for example if a sample contains unseen entities, or on the contrary if we want to assess which relation links two entities without any supporting sentence. diff --git a/mainmatter/graph/biclique.tex b/mainmatter/graph/biclique.tex @@ -0,0 +1,25 @@ +\begin{tikzpicture}[n/.style={circle,fill=black,inner sep=0.7mm}] + \matrix[matrix of nodes, column sep=1cm] at (0, 8mm) { + \node[n] (a1) {}; & + \node[n] (a2) {}; & + \node[n] (a3) {}; & + \node[n] (a4) {}; \\ + }; + \matrix[matrix of nodes, column sep=1cm] at (0, -8mm) { + \node[n] (b1) {}; & + \node[n] (b2) {}; & + \node[n] (b3) {}; \\ + }; + + \foreach \i in {1,...,4}{ + \foreach \j in {1,...,3}{ + \draw[-{Latex[width=1mm]}] (a\i) -- (b\j); + } + } + + \node[draw, Dark2-A, thick, dashed, fit=(a1) (a4), inner sep=2mm] (a) {}; + \node[draw, Dark2-B, thick, dashed, fit=(b1) (b3), inner sep=2mm] (b) {}; + + \node[left=0.2mm of a.west] {\(A\)}; + \node[left=0.2mm of b.west] {\(B\)}; +\end{tikzpicture} diff --git a/mainmatter/graph/chapter.tex b/mainmatter/graph/chapter.tex @@ -0,0 +1,26 @@ +\chapter{Graph-Based Aggregate Modeling} +\label{chap:graph} +\begin{translatedepigraph} + {Henri Poincaré} + {\em Thermodyna\-mique} + {\cite*{thermodynamique}} + {It is the simple hypotheses of which one must be most wary; because these are the ones that have the most chances of passing unnoticed.} + C'est même des hypothèses simples qu'il faut le plus se défier, parce que ce sont celles qui ont le plus de chances de passer inaperçues. +\end{translatedepigraph} +\begin{epigraph} + {Tim Berners-Lee} + {\citetitle{weaving_the_web}} + {\cite*{weaving_the_web}} + In an extreme view, the world can be seen as only connections, nothing else. + We think of a dictionary as the repository of meaning, but it defines words only in terms of other words. + I liked the idea that a piece of information is really defined only by what it's related to, and how it's related. + There really is little else to meaning. + The structure is everything. +\end{epigraph} +\input{mainmatter/graph/introduction.tex} +\input{mainmatter/graph/encoding.tex} +\input{mainmatter/graph/analysis.tex} +\input{mainmatter/graph/related work.tex} +\input{mainmatter/graph/approach.tex} +\input{mainmatter/graph/experiments.tex} +\input{mainmatter/graph/conclusion.tex} diff --git a/mainmatter/graph/chebyshev.tex b/mainmatter/graph/chebyshev.tex @@ -0,0 +1,17 @@ +\begin{tikzpicture}[chebyshev/.style={domain=-1:1, samples=500}] + \begin{axis}[ + modern, + width=50mm, + legend entries={\(T_0\), \(T_1\), \(T_2\), \(T_3\), \(T_4\)}, + legend columns=3, + legend style={ + at={(0.5,-0.3)}, + anchor=north, + draw=none}] + \addplot[Dark2-A, solid, chebyshev] {1}; + \addplot[Dark2-B, dotted, chebyshev] {x}; + \addplot[Dark2-C, dashed, chebyshev] {2*x^2-1}; + \addplot[Dark2-D, dashdotted, chebyshev] {4*x^3-3*x}; + \addplot[Dark2-E, loosely dashdotdotted, chebyshev] {8*x^4-8*x^2+1}; +\end{axis} +\end{tikzpicture} diff --git a/mainmatter/graph/conclusion.tex b/mainmatter/graph/conclusion.tex @@ -0,0 +1,11 @@ +\section{Conclusion} +\label{sec:graph:conclusion} +In this chapter, we explore aggregate approaches to unsupervised relation extraction using graphs. +In Section~\ref{sec:graph:analysis}, we show that a large amount of information can be leveraged from the neighborhood of a sample. +This, together with the observation that previous unsupervised methods always ignored the neighborhood of a sample at inference, opens a new research direction for unsupervised methods. +In Section~\ref{sec:graph:approach}, we propose several models that make use of the neighborhood information. +In particular, we propose a novel unsupervised training loss in Section~\ref{sec:graph:refining}, which makes very few modeling assumptions while still being able to exploit the neighborhood information both at training and prediction time. + +Our contributions lie in using a multigraph with arcs attributed with sentences (Sections~\ref{sec:graph:encoding}), our method to approximate the quantity of information extractible from this graph (Sections~\ref{sec:graph:analysis}) and our proposed approach to utilize this additional information (Section~\ref{sec:graph:approach}). +Despite encouraging early results showing the soundness of using the relation extraction graph, at the present time we only improved nonparametric models. +More experimentation is still needed to fully exploit topological information. diff --git a/mainmatter/graph/encoding.tex b/mainmatter/graph/encoding.tex @@ -0,0 +1,139 @@ +\section{Encoding Relation Extraction as a Graph Problem} +\label{sec:graph:encoding} +In this section, we describe how to frame the relation extraction problem as a problem on graphs. +In particular, we describe the structure of an attributed multigraph which is a generalization of the simple undirected graph defined in the previous paragraph. +This structure is needed to model entities linked by multiple relations or sentences since this can't readily be done with a simple graph. + +\begin{marginparagraph} + The distinction between \(E\) and \(\entitySet\) is important. + We decided to keep the usual \(G=(V, E)\) notation for undirected graphs. + However, the multigraph we describe in this section has the set of entities \(\entitySet\) as vertices. + This set \(\entitySet\) takes the place of \(V\); despite the similar notation, it has nothing to do with \(E\). +\end{marginparagraph} +Since a knowledge base relation can be formally defined as a set of entity pairs (Section~\ref{sec:context:relation algebra}), we can represent it using a single graph \(G=(V, E)\) where \(V\) is the set of entities (\(V=\entitySet\)) and \(E\) is the set of pairs linked by the relation (\(E\in\relationSet\)). +However, to encode the relation extraction task on a graph, we need different kinds of edges. +We, therefore, use the structure of an attributed% +\sidenote{ + The term ``\emph{labeled}'' is usually reserved for graphs where the domain of attributes is discrete and finite. + However the set of possible sentences \(\sentenceSet\) is not (theoretically) finite. +} +multigraph \(G=(\entitySet, \arcSet, \gfendpoints, \gfrelation, \gfsentence)\) where:% +\sidenote{ + To be perfectly formal, \(G\) should also depend on \(\sentenceSet\) and \(\relationSet\), the co-domains of \(\gfsentence\) and \(\gfrelation\). + We omit them for conciseness. +} +\begin{itemize}[nosep] + \item \(\entitySet\) is the set of entities, which corresponds to the vertices of \(G\) (indeed \(\entitySet = V\)), + \item \(\arcSet\) is the set of arcs, which represent a directed% + \sidenote{ + We use the word \emph{edge} to refer to a symmetric connection \(\{u, v\}\), while \emph{arc} refers to an asymmetric connection \((u, v)\). + Using this nomenclature, an undirected graph has \emph{edges} while a directed graph has \emph{arcs}. + } + link (usually a sentence) between two entities (this approximately corresponds to the set of edges \(E\) in a simple graph, but can also be seen as equivalent to a supervised set of samples \(\dataSet_\relationSet\)), + \item \(\gfsource: \arcSet\to \entitySet\) assigns to each arc its source vertex (the entity \(e_1\)), + \item \(\gftarget: \arcSet\to \entitySet\) assigns to each arc its target vertex (the entity \(e_2\)), + \item \(\gfsentence: \arcSet\to \sentenceSet\) assigns to each arc \(a\in \arcSet\) the corresponding sentence containing \(\gfsource(a)\) and \(\gftarget(a)\), + \item \(\gfrelation: \arcSet\to \relationSet\) assigns to each arc \(a\in\arcSet\) the relation linking the two entities conveyed by \(\gfsentence(a)\). +\end{itemize} + +In this graph, the vertices are entities with an arc linking them for each sentence in which they both appear. +Figure~\ref{fig:graph:samples example} shows the graph corresponding to the sentences in Table~\ref{tab:relation extraction:supervised samples}. +Let's call \(a\in\arcSet\) the highlighted bottom left arc in Figure~\ref{fig:graph:samples example} linking \textsc{smersh} to counterintelligence. +Applying the above definitions to this arc we have: +\begin{itemize}[nosep] + \item \(\gfsource(a) = \textsc{smersh}\) (\wdent{158363}) + \item \(\gftarget(a) = \text{counterintelligence}\) (\wdent{501700}) + \item \(\rlap{\(\gfsentence(a)\)}\hphantom{\gfsource(a)} = \parbox[t]{90mm}{\hbadness=2000\relax% Can't do better :'( + In its \utail{counter-espionage} and counter-intelligence roles, \uhead{\textsc{smersh}} appears to have been extremely successful throughout World War II.}\) + \item \(\rlap{\(\gfrelation(a)\)}\hphantom{\gfsource(a)} = \textsl{field of work}\) (\wdrel{101}) +\end{itemize} +\begin{marginparagraph}[-16mm] + Remember that \(\sentenceSet\) is not simply a set of regular sentences but a set of sentences with two tagged and ordered entities. +\end{marginparagraph} +\begin{figure*} + \centering + \input{mainmatter/graph/samples example.tex} + \scaption[Multigraph construction example.]{ + Multigraph \(G\) corresponding to the four samples of Table~\ref{tab:relation extraction:supervised samples}. + For each arc \(a\), its relation \(\gfrelation(a)\) is written over the arc, and the beginning of the conveying sentence \(\gfsentence(a)\) is written under the arc. + For ease of reading, surface forms are given instead of numerical identifiers. + The highlighted arc corresponds to the example given above. + \label{fig:graph:samples example} + } +\end{figure*} + +In the supervised relation extraction task, the set of relations \(\relationSet\) is fully known, and \(\gfrelation\) is partially known; the goal is to complete \(\gfrelation\). +In the unsupervised relation extraction task, \(\relationSet\) is unknown, and \(\gfrelation\) must be built from the ground up. +We can also encode a knowledge base using this structure by removing the associated sentences (i.e.~the \(\gfsentence\) attributes).% +\sidenote[][1cm]{ + Indeed, in this case, the graph is simply a set of entities linked by relation arcs such as \inlineArc[\textsl{capital of}]{\text{Sanaa}}{\text{Yemen}}. +} + +Note that the graph \(G\) is directed because most relations and sentences are asymmetric (inverting the two entities changes the meaning). +This is the only semantic associated with orientation.% +\sidenote[][12mm]{ + For example, while the notion of sink---a vertex with no outgoing arcs---might be of interest to graph theorists, it bears no special meaning in our encoding. +} +In the unsupervised setting, when the graph is not labeled with relations, each arc \inlineArc[s]{u}{v} has a symmetric arc \inlineArc*[\breve{s}]{u}{v} where \(\breve{s}\in\sentenceSet\) is the same sentence as \(s\in\sentenceSet\) with the tags \uhead{\null\kern3mm} and \utail{\null\kern3mm} inverted. + +For ease of notation, let us define the incident function \(\gfincidents\) associating to each vertex its set of incident arcs \(\gfincidents(e) = \left\{ a\in\arcSet \middlerel{|} \gfsource(a)=e \lor \gftarget(a)=e \right\}\). +In other words, \(\gfincidents\) associates to each entity the set of samples in which it appears. +Furthermore, for each relation \(r\in \relationSet\), we define the relation graphs \(G_\gfsr = (\entitySet, \arcSet_\gfsr, \gfsource, \gftarget, \gfrelation, \gfsentence)\) where \(\arcSet_\gfsr = \{ a\in \arcSet \mid \gfrelation(a) = r \}\) is the set of arcs labeled with relation \(r\). +We can then define the out-neighbors \(\gfneighborsrr\) and in-neighbors \(\gfneighborsrl\) functions on the relation graph \(G_\gfsr\) as follows:% +\sidenote{ + Note that the functions we define here are for the open neighborhood. + This means that we don't consider a vertex to be its own neighbor. +} +\begin{align*} + \gfneighborsrr(e_1) & = \left\{\, e_2\in\entitySet \middlerel{|} \exists a\in \arcSet : \gfsource(a)=e_1 \land \gftarget(a)=e_2 \land \gfrelation(a)=r \,\right\}, \\ + \gfneighborsrl(e_1) & = \left\{\, e_2\in\entitySet \middlerel{|} \exists a\in \arcSet : \gftarget(a)=e_1 \land \gfsource(a)=e_2 \land \gfrelation(a)=r \,\right\}. +\end{align*} +Using these definitions we can write expressions for the generic neighbors function: +\begin{align*} + \gfneighbors_\gfsr(e) & = \gfneighborsrr(e) \cup \gfneighborsrl(e), \\ + \gfneighbors(e) & = \bigcup_{r\in\relationSet} \gfneighbors_\gfsr(e). +\end{align*} +Finally, we can define the degree of a vertex as its number of neighbors: +\begin{equation*} + \gfdegree(e) = |\gfneighbors(e)|, +\end{equation*} +which can be broken down into in-degree and out-degree using in-neighbors and out-neighbors. + +\leavevmode +\begin{marginparagraph}[-15mm] + Since we mention several hypotheses, we take this opportunity to remind the reader that all assumptions are detailed in Appendix~\ref{chap:assumptions}. +\end{marginparagraph} +Using these notations we can reformulate modeling assumptions such as \hypothesis{biclique} (Section~\refAssumptionSection{biclique}), \hypothesis{1-adjacency} (Section~\refAssumptionSection{oneadjacency}) and \hypothesis{\(1\to1\)} (Section~\refAssumptionSection{onetoone}). +For example, the hypothesis \hypothesis{biclique} draw its name from the fact that for all relation \(r\in\relationSet\), the relation graph \(G_\gfsr\) is assumed to be a biclique.% +\sidenote[][-16mm]{ + A biclique is a \emph{complete bipartite graph}. + Its vertices can be split into two sets \(A, B\subseteq\entitySet\) such that each vertex in \(A\) is linked to all vertices in \(B\). + For example: + \begin{center} + \input{mainmatter/graph/biclique.tex} + \end{center} +} +This is especially of interest to study matching the blanks (\textsc{mtb}, Section~\ref{sec:relation extraction:mtb}). +It can be analyzed using the following graph:% +\begin{center} + \begin{tikzpicture} + \node (e1) {\(e_1\)}; + \node[right=of e1] (e2) {\(e_2\)}; + \node[left=of e1] (e3) {\(e_3\)}; + \draw[arrow] (e1) to node[midway,above] {\(r_3\)} (e3); + \draw[arrow] (e1) to[bend left=30] node[midway,above] {\(r_1\)} (e2); + \draw[arrow] (e1) to[bend right=30] node[midway,below] {\(r_2\)} (e2); + \end{tikzpicture} +\end{center} +\textsc{mtb} makes two main assumptions: \hypothesis{1-adjacency} and \hypothesis{\(1\to1\)}. +In the above graph, \hypothesis{1-adjacency} implies that \(r_1\) and \(r_2\) should be the same, while \hypothesis{\(1\to1\)} implies that \(r_3\) should be different from \(r_1\) and \(r_2\). +From this simple example, we can also see that \textsc{mtb} training is 1-localized, which means that it only exploits the fact that two samples are direct neighbors.% +\sidenote[][-16mm]{ + Here we use \emph{neighbors} as in ``arc-neighbors.'' + This is a relation between two arcs sharing a common endpoint. + Arc-neighbors are simple neighbors in the line graph described in Section~\ref{sec:graph:topological features}. +} +In contrast, a sentential approach is 0-localized; it completely ignores other samples. +This is actually the case of \textsc{mtb} during evaluation. +The same problem plagues the fill-in-the-blank model of Chapter~\ref{chap:fitb}; while training is influenced by the direct neighbors (through the entity embeddings), when classifying an unknown sample, its neighbors are ignored. +The goal of this chapter is to consider larger neighborhoods both for training unsupervised models and for making predictions with them. diff --git a/mainmatter/graph/experiments.tex b/mainmatter/graph/experiments.tex @@ -0,0 +1,43 @@ +\section{Experiments} +\label{sec:graph:experiments} +Matching the blanks was trained on a huge unsupervised dataset that is not publicly available \parencite{mtb}. +To ensure reproducibility, we instead attempt to train on \textsc{t-re}x (Section~\ref{sec:datasets:trex}, \citex{trex}). +The evaluation is done in the few-shot setting (Section~\ref{sec:relation extraction:few-shot}) on the FewRel dataset (Section~\ref{sec:datasets:fewrel}) in the 5-way 1-shot setup. +Our code is available at \url{https://esimon.eu/repos/gbure}. + +The \bertcoder{} model we use is the entity markers--entity start described in Section~\ref{sec:relation extraction:mtb sentential}, based on a \texttt{bert-base-cased} transformer. +We use a \bertcoder{} with no post-processing layer for the standalone \textsc{bert} model. +The \textsc{mtb} model is followed by a layer norm even during pre-training as described by \textcite{mtb}. +The \textsc{mtb} similarity function remains a dot product but was rescaled to be normally distributed. +When augmenting \textsc{mtb} with a \textsc{gcn}, we tried both the Chebyshev approximation described in Section~\ref{sec:graph:spectral gcn} and the mean aggregator of Section~\ref{sec:graph:spatial gcn}, however we were only able to train de Chebyshev variant at the time of writing. +The nonparametric \textsc{wl} algorithm uses a dot product for linguistic similarity and a Euclidean 1-Wasserstein distance for topological distance; the hyperparameters are \(\vctr{\lambda}=[-1, 0.2]\transpose\). + +\leavevmode% +\begin{margintable}[0mm] + \centering + \input{mainmatter/graph/quantitative.tex} + \scaption[Preliminary results for FewRel valid accuracies of graph-based approaches.]{ + Preliminary results for FewRel valid accuracies of graph-based approaches. + To better evaluate the efficiency of topological features, we report results on the subset of the dataset that is connected in \textsc{t-re}x. + } + \label{tab:graph:quantitative} +\end{margintable} +We report our results in Table~\ref{tab:graph:quantitative}. +The given numbers are accuracies on the subset of FewRel with at least one neighbor in \textsc{t-re}x. +The accuracies on the whole dataset are 73.74\% for linguistic features alone (\textsc{bert}) and 77.54\% for \textsc{mtb}. +Our results for \textsc{mtb} are still slightly below what \textcite{mtb} report because of the \textsc{bert} model size mismatch and the smaller pre-training dataset. +The result gap is within expectations, as already reported by other works that used a similar setup on the supervised setup \parencite{mtb_low}. +On the other hand, our accuracy for a standalone \textsc{bert} is higher than what \textcite{mtb} report; we suspect this is due to our removal of the randomly initialized post-processing layer. + +The top half of Table~\ref{tab:graph:quantitative} reports results for nonparametric models. +These models were not trained for the relation extraction task; they simply exploit an \textsc{mlm}-pretrained \textsc{bert} in clever ways. +As we can see, while topological features are a bit less expressive to extract relations by themselves, they still contain additional information that can be used jointly with linguistic features---this is what the nonparametric \textsc{wl} model does. + +For parametric models, we have difficulties training on \textsc{t-re}x because of its relative small size. +In practice 66.89\% of FewRel entities are already mentioned in \textsc{t-re}x. +However, a standard 5-way 1-shot problem contains \((1+5)\times 2=12\) different entities. +We measure the empirical probability that all entities of a few-shot problem are connected in \textsc{t-re}x to be around 0.54\%. +Furthermore, we observe that \textsc{mtb} augmented with a \textsc{gcn} performs worse than a standalone \textsc{mtb} despite adding a single linear layer to the parameters (the \bertcoder{} of the linguistic and topological distances are shared). +These are still preliminary results, however, it seems the small size of \textsc{t-re}x coupled with the large amount of additional information presented to the model cause it to overfit on the train data. +We observe a similar problem with the triplet loss model of Section~\ref{sec:graph:refining}. +At the time of writing, our current plan is to attempt training on a larger graph, similar to the unsupervised dataset of \textcite{mtb}. diff --git a/mainmatter/graph/graph convolution parallel.tex b/mainmatter/graph/graph convolution parallel.tex @@ -0,0 +1,61 @@ +\begin{tikzpicture}[ + a/.style={fill=Dark2-A}, + b/.style={fill=Dark2-B}, + c/.style={fill=Dark2-C}] +\matrix[name=cnn, matrix of nodes, column sep=2mm, row sep=2mm, anchor=east] at (-1mm, 0) { + \node[a] (c11) {}; & \node[b] (c12) {}; & \node[c] (c13) {}; & \node[c] (c14) {}; & \node[a] (c15) {}; \\ + \node[a] (c21) {}; & \node[c] (c22) {}; & \node[c] (c23) {}; & \node[c] (c24) {}; & \node[a] (c25) {}; \\ + \node[b] (c31) {}; & \node[b] (c32) {}; & \node[a] (c33) {}; & \node[b] (c34) {}; & \node[b] (c35) {}; \\ + \node[b] (c41) {}; & \node[a] (c42) {}; & \node[b] (c43) {}; & \node[c] (c44) {}; & \node[a] (c45) {}; \\ + \node[b] (c51) {}; & \node[a] (c52) {}; & \node[a] (c53) {}; & \node[a] (c54) {}; & \node[a] (c55) {}; \\ +}; + +\foreach \y in {1,...,5}{ + \foreach \x in {1,...,5}{ + \pgfmathsetmacro{\xnext}{int(\x+1)} + \pgfmathsetmacro{\ynext}{int(\y+1)} + \ifnum\y<5 \draw (c\y\x) -- (c\ynext\x); \fi + \ifnum\x<5 + \draw (c\y\x) -- (c\y\xnext); + \ifnum\y<5 + \draw (c\y\x) -- (c\ynext\xnext); + \draw (c\ynext\x) -- (c\y\xnext); + \fi + \fi + } +} + +\draw[thick] (0, -1) -- (0, 1); + +\node[a] (g1) at (03mm, 0mm) {}; +\node[b] (g2) at (05mm, 4mm) {}; +\node[a] (g3) at (11mm, -5mm) {}; +\node[c] (g4) at (04mm, -6mm) {}; +\node[b] (g5) at (13mm, 1mm) {}; +\node[c] (g6) at (08mm, -1mm) {}; +\node[a] (g7) at (16mm, 9mm) {}; +\node[b] (g8) at (11mm, 8mm) {}; +\node[c] (g9) at (15mm, -7mm) {}; +\node[b] (gA) at (18mm, 2mm) {}; +\node[a] (gB) at (22mm, 4mm) {}; + +\draw (g1) -- (g2); +\draw (g1) -- (g4); +\draw (g1) -- (g6); +\draw (g2) -- (g5); +\draw (g2) -- (g6); +\draw (g2) -- (g8); +\draw (g3) -- (g4); +\draw (g3) -- (g9); +\draw (g3) -- (g5); +\draw (g3) -- (g6); +\draw (g5) -- (g6); +\draw (g5) -- (g7); +\draw (g5) -- (g8); +\draw (g5) -- (gA); +\draw (g7) -- (g8); +\draw (g7) -- (gA); +\draw (g7) -- (gB); +\draw (gA) -- (gB); + +\end{tikzpicture} diff --git a/mainmatter/graph/introduction.tex b/mainmatter/graph/introduction.tex @@ -0,0 +1,37 @@ +As we showcase in the last chapter, the relational semantics we are trying to model is challenging to capture in an unsupervised fashion. +The information available in each sentence is scarce. +To alleviate this problem, we can take a holistic approach by explicitly modeling the relational information at the dataset level, similarly to the aggregate approaches discussed in Section~\ref{sec:relation extraction:aggregate}. +The information encoded in the structure of the dataset can be modeled using a graph \parencitex{graphie}. +In this chapter, we propose a graph-based unsupervised aggregate relation extraction method to exploit the signal in the dataset structure explicitly. + +Since we model dataset-level information, we need to place ourselves in the aggregate setup (Section~\ref{sec:relation extraction:definition}) as defined by Equation~\ref{eq:relation extraction:aggregate definition}. +As a reminder, the aggregate setup is in opposition to the sentential setup used in the previous chapter. +In the sentential setup, we process sentences independently. +In contrast, in the aggregate setup, we consider all the samples \(\dataSet\subseteq\sentenceSet\times\entitySet\) jointly to extract knowledge base facts \(\kbSet\subseteq\entitySet\times\relationSet\), without necessarily mapping each individual sample to a fact. +We already introduced two aggregate supervised relation extraction approaches relying on graph modeling, label propagation (Section~\ref{sec:relation extraction:label propagation}) and \textsc{epgnn} (Section~\ref{sec:relation extraction:epgnn}). +The latter uses a spectral graph convolutional network (\textsc{gcn}). +\textsc{gcn}s are the main contribution coming from a recent resurgence of interest in graph-based approaches through the use of deep learning methods. +It has been shown that these methods share some similarities with the Weisfeiler--Leman isomorphism test \parencitex{gcn_spectral_semi}. +A graph isomorphism test attempts to decide whether two graphs are identical. +To this end, it assigns a color to each element, classifying it according to its neighborhood. +Coupled with the assumption that sentences conveying similar relations have similar neighborhoods, this closely relates the isomorphism problem to unsupervised relation extraction. +However, unsupervised \textsc{gcn}s are usually trained by assuming that neighboring samples have similar representations, completely discarding the characteristic of the Weisfeiler--Leman algorithm that makes it interesting from a relation extraction point of view. +In this chapter, we propose alternative training objectives of unsupervised graph neural networks for relation extraction. + +In Section~\ref{sec:graph:encoding}, we see how to extend the definition of a simple graph to model a relation extraction problem. +We then provide some statistics on the \textsc{t-re}x dataset in Section~\ref{sec:graph:analysis}. +The results support that large amount of information can be leveraged from topological features for the relation extraction problem. +In Section~\ref{sec:graph:related work}, we take a quick tour of graph neural networks (\textsc{gnn}) and the Weisfeiler--Leman isomorphism test. +Most \textsc{gnn}s apply to simple undirected graphs, whereas we need a more complex structure to encode the relation extraction task. +While most models, such as \textsc{epgnn}, try to adapt the encoding of relation extraction to simple undirected graphs, in Section~\ref{sec:graph:approach}, we propose to adapt existing \textsc{gnn} methods to the richer structure needed to fully capture the relation extraction problem. +Finally, Section~\ref{sec:graph:experiments} presents the experimental results of the proposed approaches. + +\paragraph{Notations used in this chapter.} +A simple undirected graph is defined as a tuple \(G=(V, E)\) where \(V\) is a set of \(n\) vertices and \(E\) is a set of \(m\) edges.% +\sidenote{ + In a simple graph, we always have \(m\leq n (n-1)\) which tightens to \(m\leq n (n-1) \divslash 2\) for undirected ones. +} +An edge \(\{u, v\}\in E\) connects two vertices \(u, v\in V\), which are then said to be \emph{neighbors}. +We use \(\gfneighbors: V\to 2^V\) to denote the function which associates to each vertex the set of its neighbors \(\gfneighbors(u)=\{v\in V\mid \exists \{u, v\}\in E\}\). +Alternatively, a graph \(G\) can be represented by its adjacency matrix \(\mtrx{M}\in\{0, 1\}^{n\times n}\), with \(m_{uv}=1\) if \(\{u, v\}\in E\) and \(m_{uv}=0\) otherwise. +A graph is said to encode an adjacency relation on its vertices, which foreshadows the remainder of this chapter. diff --git a/mainmatter/graph/isomorphism.tex b/mainmatter/graph/isomorphism.tex @@ -0,0 +1,52 @@ +\begin{tikzpicture}[ + gn/.style={circle,line width=1mm,inner sep=0.2mm, minimum width=4.5mm,anchor=center}, + gn1/.style={gn, draw=Dark2-A}, + gn2/.style={gn, draw=Dark2-B}, + gn3/.style={gn, draw=Dark2-C}, + gn4/.style={gn, draw=Dark2-D}, + gn5/.style={gn, draw=Dark2-E}, + gn6/.style={gn, draw=Dark2-F}, + gn7/.style={gn, draw=Dark2-G}, + gn8/.style={gn, draw=Dark2-H}, + ] + + \matrix[name=bipartite, matrix of nodes, column sep=5mm, row sep=4mm, anchor=east] at (-1mm, 0) { + \node[gn1] (b1) {\(1\)}; & \node[gn2] (b2) {\(2\)}; \\ + \node[gn3] (b3) {\(3\)}; & \node[gn4] (b4) {\(4\)}; \\ + \node[gn5] (b5) {\(5\)}; & \node[gn6] (b6) {\(6\)}; \\ + \node[gn7] (b7) {\(7\)}; & \node[gn8] (b8) {\(8\)}; \\ + }; + + \draw (b1) -- (b2); + \draw (b1) -- (b4); + \draw (b1) -- (b6); + \draw (b3) -- (b2); + \draw (b3) -- (b4); + \draw (b3) -- (b8); + \draw (b5) -- (b2); + \draw (b5) -- (b6); + \draw (b5) -- (b8); + \draw (b7) -- (b4); + \draw (b7) -- (b6); + \draw (b7) -- (b8); + + \matrix[name=square, matrix of nodes, column sep=2mm, row sep=2mm, anchor=west] at (1mm, 0) { + \node[gn1] (s1) {\(a\)}; & & & \node[gn4] (s4) {\(d\)}; \\ + & \node[gn2] (s2) {\(b\)}; & \node[gn3] (s3) {\(c\)}; & \\ + & \node[gn5] (s5) {\(e\)}; & \node[gn8] (s8) {\(h\)}; & \\ + \node[gn6] (s6) {\(f\)}; & & & \node[gn7] (s7) {\(g\)}; \\ + }; + + \draw (s1) -- (s2); + \draw (s1) -- (s4); + \draw (s1) -- (s6); + \draw (s3) -- (s2); + \draw (s3) -- (s4); + \draw (s3) -- (s8); + \draw (s5) -- (s2); + \draw (s5) -- (s6); + \draw (s5) -- (s8); + \draw (s7) -- (s4); + \draw (s7) -- (s6); + \draw (s7) -- (s8); +\end{tikzpicture}% diff --git a/mainmatter/graph/line graph.tex b/mainmatter/graph/line graph.tex @@ -0,0 +1,59 @@ +\kern2mm% +\begin{tikzpicture}[ + gn/.style={circle,line width=1mm,inner sep=0.3mm, minimum width=5mm,anchor=center,draw=black!50}, + gn1/.style={gn, draw=Dark2-A}, + gn2/.style={gn, draw=Dark2-B}, + gn3/.style={gn, draw=Dark2-C}, + gn4/.style={gn, draw=Dark2-D}, + gn5/.style={gn, draw=Dark2-E}, + gn6/.style={gn, draw=Dark2-F}, + gn7/.style={gn, draw=Dark2-G}, + gn8/.style={gn, draw=Dark2-H}, +] + +\node[gn] (ga) at (0, 1.5) {\(a\)}; +\node[gn] (gb) at (1, 0.5) {\(b\)}; +\node[gn] (gc) at (2, 1.5) {\(c\)}; +\node[gn] (gd) at (3, 2.5) {\(d\)}; +\node[gn] (ge) at (3, 0.5) {\(e\)}; +\node[gn] (gf) at (4, 1.5) {\(f\)}; + +\draw[ultra thick, Dark2-A] (ga) -- (gb); +\draw[ultra thick, Dark2-B] (ga) -- (gc); +\draw[ultra thick, Dark2-C] (gb) -- (gc); +\draw[ultra thick, Dark2-D] (gc) -- (gd); +\draw[ultra thick, Dark2-E] (gc) -- (ge); +\draw[ultra thick, Dark2-F] (gc) -- (gf); +\draw[ultra thick, Dark2-G] (gd) -- (gf); +\draw[ultra thick, Dark2-H] (ge) -- (gf); + +\node[gn1] (lab) at (0 , -3) {\(ab\)}; +\node[gn2] (lac) at (0.72, -2) {\(ac\)}; +\node[gn3] (lbc) at (1.44, -3) {\(bc\)}; +\node[gn4] (lcd) at (2.88, -1) {\(cd\)}; +\node[gn5] (lce) at (2.88, -3) {\(ce\)}; +\node[gn6] (lcf) at (3.6 , -2) {\(cf\)}; +\node[gn7] (ldf) at (4.32, -1) {\(df\)}; +\node[gn8] (lef) at (4.32, -3) {\(ef\)}; + +\draw (lab) -- (lac); +\draw (lab) -- (lbc); +\draw (lac) -- (lbc); +\draw (lac) -- (lcd); +\draw (lac) -- (lce); +\draw (lac) -- (lcf); +\draw (lbc) -- (lcd); +\draw (lbc) -- (lce); +\draw (lbc) -- (lcf); +\draw (lcd) -- (lce); +\draw (lcd) -- (lcf); +\draw (lcd) -- (ldf); +\draw (lce) -- (lcf); +\draw (lce) -- (lef); +\draw (lcf) -- (ldf); +\draw (lcf) -- (lef); +\draw (ldf) -- (lef); + +\node[anchor=west] at (0, 2.5) {\normalsize \(G\):}; +\node[anchor=west] at (0, -1) {\normalsize \(L(G)\):}; +\end{tikzpicture} diff --git a/mainmatter/graph/nell bipartite.tex b/mainmatter/graph/nell bipartite.tex @@ -0,0 +1,33 @@ +\begin{tikzpicture}[ + entity/.style={}, + relation/.style={anchor=base east}] +\matrix[name=entities, matrix of nodes, row sep=1mm, anchor=west, column 1/.style={anchor=base west}] at (-2.4, 0) { + \node[entity] (Italy) {Italy}; \\ + \node[entity] (Peru) {Peru}; \\ + \node[entity] (Kenya) {Kenya}; \\ + \node[entity] (Rome) {Rome}; \\ + \node[entity] (Lima) {Lima}; \\ + \node[entity] (Nairobi) {Nairobi}; \\ + \node[entity] (Brejar) {Samuel Brejar}; \\ + \node[entity] (Gracchus) {Tiberius Gracchus}; \\ +}; + +\matrix[name=relations, matrix of nodes, row sep=3mm, anchor=east] at (2.4, 0) { + \node[relation] (capital of e2) {\textsl{capital of} \(e_2\)}; \\ + \node[relation] (e1 capital of) {\(e_1\) \textsl{capital of}}; \\ + \node[relation] (born in e2) {\textsl{born in} \(e_2\)}; \\ + \node[relation] (e1 born in) {\(e_1\) \textsl{born in}}; \\ +}; + + \draw (Rome.east) -- (e1 capital of.west); + \draw (Lima.east) -- (e1 capital of.west); + \draw (Nairobi.east) -- (e1 capital of.west); + \draw (Italy.east) -- (capital of e2.west); + \draw (Peru.east) -- (capital of e2.west); + \draw (Kenya.east) -- (capital of e2.west); + \draw (Brejar.east) -- (e1 born in.west); + \draw (Gracchus.east) -- (e1 born in.west); + \draw (Rome.east) -- (born in e2.west); + \draw (Lima.east) -- (born in e2.west); + \draw (Nairobi.east) -- (born in e2.west); +\end{tikzpicture} diff --git a/mainmatter/graph/path counting.tex b/mainmatter/graph/path counting.tex @@ -0,0 +1,39 @@ +\begin{algorithmic} + \Function{Path counting}{} + \FunctionInputs{} \(G=(\entitySet, \arcSet, \gfendpoints, \gfrelation, \gfsentence)\) relation multigraph + \FunctionInputs*{} \(k\) paths length + \FunctionOutput{} \(C\) relation paths counter + \State + \LComment{Initialization} + \State \(C \gets \text{new counter}\ \relationSet^k \to \symbb{R}\ \text{initialized at 0}\) + \LComment{Main Loop} + \Loop + \LComment{Initialize the importance weight with \(\symcal{W}^k\)} + \State \(w \gets \left(\symbf{1}\transpose \mtrx{M}^k \symbf{1}\right)^{-1}\) + \Comment{\(\mtrx{M}\) is the adjacency matrix} + \State Initialize empty walk \(\vctr{a}=()\) + \State Sample \(v\sim \uniformDistribution(\entitySet)\) + \State \(w \gets n \times w\) + \Comment{Update \(w\) following the sampling of \(v\)} + \For{\(i=1,\dotsc,k\)} + \State Sample \(x\sim\uniformDistribution(\gfincidents(v))\) + \State \(w \gets w \times \gfdegree(v)\) + \Comment{Accumulate \(1\divslash\symcal{F}^k\)} + \If{\(\gfsource(x) = v\)} + \Comment{Continue with \(\gfendpoints(x)\setminus\{v\}\)} + \State Append \(x\) to \(\vctr{a}\) + \State \(v\gets \gftarget(x)\) + \Else + \State Append \(\breve{x}\) to \(\vctr{a}\) + \State \(v\gets \gfsource(x)\) + \EndIf + \EndFor + \If{\(\vctr{a}\) is a path} + \State \(\vctr{r} \gets (\gfrelation(a_i))_{1\leq i\leq k}\) + \Comment{Take the relations of \(\vctr{a}\)} + \State \(C[\vctr{r}] \gets C[\vctr{r}] + w\) + \EndIf + \EndLoop + \State \Output \(C\) + \EndFunction +\end{algorithmic} diff --git a/mainmatter/graph/path graph.tex b/mainmatter/graph/path graph.tex @@ -0,0 +1,9 @@ +\begin{tikzpicture}[baseline=(n2.south), c/.style={regular polygon,regular polygon sides=4,minimum width=1.5mm,inner sep=0mm}] + \node[c,fill=Dark2-A] (n1) at (0, 0) {}; + \node[c,fill=Dark2-A] (n2) at (0.25, 0) {}; + \node[c,fill=Dark2-B] (n3) at (0.5, 0) {}; + \node[c,fill=Dark2-C] (n4) at (0.75, 0) {}; + \draw (n1) -- (n2) -- (n3) -- (n4); + \node[anchor=east,left=-1mm of n1.west] {\(\ldots\)}; + \node[anchor=west,right=-1mm of n4.east] {\(\ldots\)}; +\end{tikzpicture} diff --git a/mainmatter/graph/paths frequencies.tex b/mainmatter/graph/paths frequencies.tex @@ -0,0 +1,16 @@ +\raisebox{-20mm}[4.5mm][\totalheight-4.5mm]{% +\begin{tabular}{@{}r c c@{}} + \toprule + \multirow{2}{*}{Frequency} & \multicolumn{2}{c}{Relation path} \\ + \cmidrule(lr){2-3} + & Surface forms & Identifiers \\ + \midrule + 54.657‰ & \(\textsl{country} \relationComposition \textsl{diplomatic relation} \relationComposition \widebreve{\textsl{country}}\) & \(\wdrel{17} \relationComposition \wdrel{530} \relationComposition \Pwidebreve{\wdrel{17}}\) \\ + 31.696‰ & \(\textsl{country} \relationComposition \textsl{diplomatic relation} \relationComposition \widebreve{\textsl{citizen of}}\) & \(\wdrel{17} \relationComposition \wdrel{530} \relationComposition \Pwidebreve{\wdrel{27}}\) \\ + 6.680‰ & \(\textsl{country} \relationComposition \textsl{shares border with} \relationComposition \widebreve{\textsl{citizen of}}\) & \(\wdrel{17} \relationComposition \wdrel{47} \relationComposition \Pwidebreve{\wdrel{27}}\) \\ + 0.013‰ & \(\textsl{country} \relationComposition \textsl{seceded from} \relationComposition \widebreve{\textsl{citizen of}}\) & \(\wdrel{17} \relationComposition \wdrel{807} \relationComposition \Pwidebreve{\wdrel{27}}\) \\ + 9.445‰ & \(\textsl{sport} \relationComposition \widebreve{\textsl{sport}} \relationComposition \widebreve{\textsl{member of}_\textsc{st}}\) & \(\wdrel{641} \relationComposition \Pwidebreve{\wdrel{641}} \relationComposition \Pwidebreve{\wdrel{54}}\) \\ + 10\rlap{\({}^{-6}\)}\hphantom{.000}‰ & \(\textsl{sport} \relationComposition \widebreve{\textsl{industry}} \relationComposition \widebreve{\textsl{member of}_\textsc{st}}\) & \(\wdrel{641} \relationComposition \Pwidebreve{\wdrel{452}} \relationComposition \Pwidebreve{\wdrel{54}}\) \\ + \bottomrule +\end{tabular}% +}% diff --git a/mainmatter/graph/quantitative.tex b/mainmatter/graph/quantitative.tex @@ -0,0 +1,12 @@ +\begin{tabular}{l r} +\toprule + Model & Accuracy \\ +\midrule + Linguistic (\textsc{bert}) & 69.46 \\ + Topological (\(W_1\)) & 65.75 \\ + Nonparametric \textsc{wl} & 72.18 \\ +\midrule + \textsc{mtb} & 78.83 \\ + \textsc{mtb gcn}--Chebyshev & 76.10 \\ +\bottomrule +\end{tabular} diff --git a/mainmatter/graph/related work.tex b/mainmatter/graph/related work.tex @@ -0,0 +1,498 @@ +\section{Related Work} +\label{sec:graph:related work} +In the previous section, we show that the attributed multigraph encoding we introduced in Section~\ref{sec:graph:encoding} can help us leverage additional information for the relation extraction task. +In this section, we present the existing framework for computing distributed representations of graphs. +In most cases, these process simple undirected graphs \(G=(V, E)\). +Still, these methods are applicable to our relation extraction multigraph with some modifications, as shown in Sections~\ref{sec:graph:r-gcn} and~\ref{sec:graph:approach}. + +The use of graphs in deep learning has seen a recent surge of interest over the last few years. +This produced a set of models known as graph neural networks (\textsc{gnn}) and graph convolutional networks (\textsc{gcn}).% +\sidenote{ + The term \textsc{gcn} is used with different meanings by various authors. + \textsc{gcn}s are always \textsc{gnn}s, but the reverse is not true. + However, in practice, the \textsc{gnn}s we describe in this section can essentially be described as \textsc{gcn}s. + We use the term \textsc{gcn} to describe models whose purpose is to have a similar function on graphs as \textsc{cnn}s have on images. + Some authors only refer to the model of \textcite{gcn_spectral_semi} described in Section~\ref{sec:graph:spectral gcn} as a \textsc{gcn}. + In this case, what we call \textsc{gcn} can be called conv\textsc{gnn} (convolutional graph neural networks). + In any case, \textsc{gnn} and \textsc{gcn} can be considered almost synonymous for the purpose of this thesis since we don't describe any exotic \textsc{gnn} which clearly falls outside of the realm of \textsc{gcn}. + \label{note:graph:gcn vs gnn} +} +While the first works on \textsc{gnn} started more than twenty years ago \parencite{gnn_early}, we won't go into a detailed historical review, and we exclusively focus on recent models. +Note that we already presented an older graph-based approach in Section~\ref{sec:relation extraction:label propagation}, the label propagation algorithm. +We also discussed \textsc{epgnn} in Section~\ref{sec:relation extraction:epgnn}, which is a model built on top of a \textsc{gcn}. +We further draw parallels between \textsc{epgnn} and our proposed approach in Section~\ref{sec:graph:topological features}. + +The thread of reasoning behind this section is as follows: +\begin{itemize}[nosep] + \item We present the ``usual'' way to process graphs (Sections~\ref{sec:graph:random walk}--\ref{sec:graph:r-gcn}). + \item We present the theory behind these methods (Section~\ref{sec:graph:weisfeiler-leman}). + \item We show how this theoretical background can help us design a new approach specific to the unsupervised relation extraction task (Section~\ref{sec:graph:approach}). +\end{itemize} +In this related work overview, we mainly describe algorithms working on standard \(G=(V, E)\) graphs, not the labeled multigraphs of Section~\ref{sec:graph:encoding}, with the exception of Section~\ref{sec:graph:r-gcn}. +We start by quickly describing models based on random walks in Section~\ref{sec:graph:random walk}; these are spatial methods which serve as a gentle introduction to the manipulation of graphs by neural networks. +Furthermore, they were influential in the development of subsequent models and in our preliminary analysis with computation of path statistics (Section~\ref{sec:graph:analysis}), which allows us to draw parallels with more modern approaches. +We then introduce the two main classes of \textsc{gcn}---which consequently are also the two main classes of \textsc{gnn}---used nowadays: spectral (Section~\ref{sec:graph:spectral gcn}) and spatial (Section~\ref{sec:graph:spatial gcn}). +Apart from the few works mentioned in Chapter~\ref{chap:relation extraction}, \textsc{gnn}s were seldom used for relation extraction. +We, therefore, focus on the evaluation of \textsc{gnn} on an entity classification task, which while different from our problem, works on similar data. +In Section~\ref{sec:graph:r-gcn}, we describe models designed to handle relational data in a knowledge base, in particular \textsc{r-gcn}. +We close this related work with a presentation of the Weisfeiler--Leman isomorphism test in Section~\ref{sec:graph:weisfeiler-leman}; it serves as a theoretical motivation behind both \textsc{gcn}s and our proposed approach. + +\subsection{Random-Walk-Based Models} +\label{sec:graph:random walk} +DeepWalk \parencitex{deepwalk} is a method to learn vertex representations from the structure of the graph alone. +The representations encode how likely it is for two vertices to be close to each other in the graph. +To this end, DeepWalk models the likelihood of random walks in the graph (Section~\ref{sec:graph:analysis}). +These walks are simply sequences of vertices. +To obtain a distributed representation out of them, we can use the \textsc{nlp} approaches of Sections~\ref{sec:context:word} and~\ref{sec:context:sentence} by treating the set of vertices as the vocabulary \(V=\entitySet\). +In particular, DeepWalk uses the skip-gram model of Word2vec (Section~\ref{sec:context:skip-gram}), using hierarchical softmax to approximate the partition function over all words---i.e.~vertices. +Vertices part of the same random walk are used as positive examples. +In the same way that learning representations to predict the neighborhood of a word gives good word representations, modeling the neighborhood of a vertex gives good vertex representations. + +\Textcite{deepwalk} evaluate their model on a node classification task. +For example, one of the datasets they use is BlogCatalog \parencite{blogcatalog}, where vertices correspond to blogs, edges are built from social network connections between the various bloggers, and predicted labels are the set of topics on which each blog focuses. +DeepWalk is a transductive method but was extended into an inductive approach called planetoid \parencitex{planetoid}. +Planetoid also proposes an evaluation on an entity classification task performed on the \textsc{nell} dataset. +The goal of this task is to find the type of an entity (e.g.~person, organization, location\dots) in a knowledge base (Section~\ref{sec:context:knowledge base}). +To this end, a special bipartite% +\sidenote{ + A bipartite graph is a graph \(G=(V, E)\) where the vertices can be split into two disjoint sets \(V_1\cup V_2=V\) such that all edges \(e\in E\) have one endpoint in \(V_1\) and one endpoint in \(V_2\). +} +graph \(G_\textsc{b} = (V_\textsc{b}, E_\textsc{b})\) is constructed where \(V_\textsc{b}=\entitySet\cup\relationSet\) and: +\begin{equation*} + E_\textsc{b} = \big\{\, \{e, r\}\subseteq V_\textsc{b} \mathrel{\big|} \exists e'\in\entitySet : (e, r, e') \in\kbSet\lor (e', r, e)\in\kbSet \,\big\} +\end{equation*} +This clearly assumes \hypothesis{biclique}: for each relation the information of ``which \(e_1\)'' corresponds to ``which \(e_2\)'' is discarded. +However this information is not as crucial for entity classification as it is for relation extraction. +A small example of graph \(G_\textsc{b}\) obtained this way is given in Figure~\ref{fig:graph:nell bipartite}. +The model is trained by jointly optimizing the negative sampling loss and the the log-likelihood of labeled examples. +On unseen entities, planetoid reach an accuracy of 61.9\% when only 0.1\% of entities are labeled. + +\begin{marginfigure} + \centering + \input{mainmatter/graph/nell bipartite.tex} + \scaption[\textsc{nell} dataset bipartite graph.]{ + \textsc{nell} dataset bipartite graph. + Entities are on the left, while relation slots are on the right. + In this graph, the edges are left unlabeled. + \label{fig:graph:nell bipartite} + } +\end{marginfigure} + +Using random walks allows DeepWalk and planetoid to leverage the pre-existing \textsc{nlp} literature. +However, for each sample, only a small fraction of the neighborhood---two neighbors at most---of each node is considered to make a prediction. +Subsequent methods focused on modeling the information of the whole neighborhood jointly. + +\subsection{Spectral \textsc{gcn}} +\label{sec:graph:spectral gcn} +The first approaches to successfully model the neighborhood of vertices jointly were based on spectral graph theory \parencite{gcn_spectral_early}. +In practice, this means that the graph is manipulated through its Laplacian matrix instead of directly through the adjacency matrix. +In this section, we base our presentation of spectral methods on the work of \textcitex{gcn_spectral_semi}[-11mm]. + +\begin{marginparagraph}[-3mm] + The graph Laplacian is similar to the standard Laplacian measuring the divergence of the gradient (\(\laplace = \nabla^2\)) of scalar functions. + Except that the graph gradient is an operator mapping a function on vertices to a function on edges: + \begin{equation*} + (\nabla \vctr{f})_{ij} = f_i - f_j + \end{equation*} + And that the graph divergence is an operator mapping a function on edges to a function on vertices: + \begin{equation*} + (\operatorname{div} \mtrx{G})_i = \sum_{j\in V} m_{ij} g_{ij} + \end{equation*} + Given these definitions, the graph Laplacian is defined as \(\laplace = - \operatorname{div} \nabla\). + Applying \(\laplace\) to a signal \(\vctr{x}\in\symbb{R}^n\) is equivalent to multiplying this signal by \(\laplacian{c}\) as defined in Equation~\ref{eq:graph:laplacian}: \(\laplace \vctr{x} = \laplacian{c} \vctr{x}\). +\end{marginparagraph} + +We start by introducing some basic concepts from spectral graph theory used to define the convolution operator on graphs. +The Laplacian of an undirected graph \(G=(V, E)\) can be defined as: +\begin{equation} + \laplacian{c} = \mtrx{D} - \mtrx{M}, + \label{eq:graph:laplacian} +\end{equation} +where \(\mtrx{D}\in\symbb{R}^{n\times n}\) is the diagonal matrix of vertex degrees \(d_{ii} = \gfdegree(v_i)\) and \(\mtrx{M}\in\symbb{R}^{n\times n}\) is the adjacency matrix. +Equation~\ref{eq:graph:laplacian} defines the combinatorial Laplacian; however, spectral \textsc{gcn}s are usually defined on the normalized symmetric Laplacian: +\begin{equation*} + \laplacian{sym} = \mtrx{D}^{-1\fracslash 2} \laplacian{c} \mtrx{D}^{-1\fracslash 2} = \mtrx{I} - \mtrx{D}^{-1\fracslash 2} \mtrx{M} \mtrx{D}^{-1\fracslash 2}. +\end{equation*} + +Using this definition, we can then take the eigendecomposition of the Laplacian \(\laplacian{sym}=\mtrx{U}\mtrx{\Lambda}\mtrx{U}^{-1}\), where \(\mtrx{\Lambda}\) is the ordered spectrum---the diagonal matrix of eigenvalues sorted in increasing order---and \(\mtrx{U}\) is the matrix of normalized eigenvectors. +For an undirected graph, the matrix \(\mtrx{M}\) is symmetric, therefore \(\mtrx{U}\) is orthogonal. +The orthonormal space formed by the normalized eigenvectors is the Fourier space of the graph. +In other words, we can define the graph Fourier transform of a signal \(\vctr{x}\in\symbb{R}^V\) as: +\begin{marginparagraph}[-1cm] + The expansion of signals in terms of eigenfunctions of the Laplace operator is the leading parallel between the graph Fourier transform and the classical Fourier transform on \(\symbb{R}\) \parencite{graph_fourier}. + In \(\symbb{R}\), the eigenfunctions \(\xi\mapsto e^{2\pi i\xi x}\) correspond to low frequencies when \(x\) is small. + In the same way, the eigenvectors of the graph Laplacian associated with small eigenvalues assign similar values to neighboring vertices. + In particular the eigenvector associated with the eigenvalue 0 is constant with value \(1\divslash\sqrt{n}\). + On the other hand, eigenvectors associated with large eigenvalues correspond to high frequencies and encode larger changes of value between neighboring vertices. +\end{marginparagraph} +\begin{equation*} + \gffourier(\vctr{x}) = \mtrx{U}\transpose \vctr{x}. +\end{equation*} +Furthermore since the induced space is orthogonal, the inverse Fourier transform is simply defined as: +\begin{equation*} + \gfinvfourier(\vctr{x}) = \mtrx{U} \vctr{x}. +\end{equation*} +Having defined the Fourier transform on graphs, we can use the definition of convolutions as multiplications in the Fourier domain to define convolution on graphs: +\begin{equation} + \vctr{x} * \vctr{w} = \gfinvfourier(\gffourier(\vctr{x}) \odot \gffourier(\vctr{w})), + \label{eq:graph:convolution} +\end{equation} +where \(\odot\) denotes the Hadamard (element-wise) product. +Note that the convolution operator implicitly depends on the graph \(G\) since \(\mtrx{U}\) is defined from the adjacency matrix \(\mtrx{M}\). +The signal \(\vctr{w}\) in Equation~\ref{eq:graph:convolution} has the same function as the parametrized filter of \textsc{cnn} (Equation~\ref{eq:context:convolution}). +Instead of learning \(\vctr{w}\) in the spatial domain, we can directly parametrize its Fourier transform \(\vctr{w}_\vctr{\theta} = \diagonal(\gffourier(\vctr{w}))\), % +\begin{marginparagraph} + \(\diagonal(\vctr{x})\) is the diagonal matrix with values of the vector \(\vctr{x}\) along its diagonal. +\end{marginparagraph} +simplifying Equation~\ref{eq:graph:convolution} into: +\begin{equation} + % ( ͡° ͜ʖ ͡°) UwU + \vctr{x} * \vctr{w}_\vctr{\theta} = \mtrx{U} \vctr{w}_\vctr{\theta} \mtrx{U}\transpose \vctr{x}. + \label{eq:graph:filter convolution} +\end{equation} +While \(\vctr{w}_\vctr{\theta}\) could be learned directly \parencite{gcn_spectral_early}, \textcite{chebnet} propose to approximate it by Chebyshev polynomials of the first kind (\(T_k\)) of the spectrum \(\mtrx{\Lambda}\): +\begin{equation} + \vctr{w}_\vctr{\theta}(\mtrx{\Lambda}) = \sum_{k=0}^{K} \theta_k T_k(\mtrx{\Lambda}). + \label{eq:graph:filter decomposition} +\end{equation} +The rationale is that computing the eigendecomposition of the graph Laplacian is too computationally expensive. +The Chebyshev polynomials approximation is used to localize the filter; since the \(k\)-th Chebyshev polynomial is of degree \(k\), only values of vertices at a distance of at most \(k\) are needed.% +\sidenote[][-39mm]{ + The reasoning behind this localization is the same as the one underlying the fact that the \(k\)-th power of the adjacency matrix gives the number of walks of length \(k\) (Section~\ref{sec:graph:analysis}). +} +This is similar to how \textsc{cnn}s are usually computed; simple very localized filters are used instead of taking the Fourier transform of the whole input matrix to compute convolution with arbitrarily complex functions. +Chebyshev polynomials of the first kind are defined as: +\begin{marginparagraph}[-32mm] + Despite its appearance, Equation~\ref{eq:graph:chebyshev cos} defines a series of polynomials which can be obtained through the application of various trigonometric identities. + An alternative but equivalent definition is through the following recursion: + \begin{align*} + T_0(x) & = 1 \\ + T_1(x) & = x \\ + T_{k+1}(x) & = 2x T_k(x) - T_{k-1}(x) + \end{align*} + The plot of the first five Chebyshev polynomials of the first kind follows: + \input{mainmatter/graph/chebyshev.tex} +\end{marginparagraph} +\begin{equation} + T_k(\cos x) = \cos(k x). + \label{eq:graph:chebyshev cos} +\end{equation} +They form a sequence of orthogonal polynomials on the interval \([-1, 1]\) with respect to the weight \(1\divslash\sqrt{1-x^2}\), meaning that for \(k\neq k'\): +\begin{equation*} + \int_{-1}^1 T_k(x) T_{k'}(x) \frac{\diff x}{\sqrt{1-x^2}} = 0. +\end{equation*} + +The filter defined by Equation~\ref{eq:graph:filter decomposition} is \(K\)-localized, meaning that the value of the output signal on a vertex \(v\) is computed from the value of \(\vctr{x}\) on vertices at distance at most \(K\) of \(v\). +This can be seen by plugging Equation~\ref{eq:graph:filter decomposition} back into Equation~\ref{eq:graph:filter convolution}, noticing that it depends on the \(k\)-th power of the Laplacian and thus of the adjacency matrix.% +\sidenotemark% XXX No more space for sidenote on this page + +\leavevmode +\sidenotetext{% XXX No more space for sidenote on previous page + Derivation of the dependency on \(\laplacian{sym}^k\) for the proof of \(K\)-locality: + \begin{align*} + \vctr{x} * \vctr{w}_\vctr{\theta}(\mtrx{\Lambda}) + & = \mtrx{U} \left( \sum_{k=0}^{K} \theta_k T_k(\mtrx{\Lambda}) \right) \mtrx{U}\transpose \vctr{x} \\ + & = \left( \sum_{k=0}^{K} \theta_k \mtrx{U} T_k(\mtrx{\Lambda}) \mtrx{U}\transpose \right) \vctr{x} \\ + & = \left( \sum_{k=0}^{K} \theta_k T_k(\laplacian{sym}) \right) \vctr{x} + \end{align*} + For the last equality, notice that \(\laplacian{sym}^k = (\mtrx{U}\mtrx{\Lambda}\mtrx{U}\transpose)^k = \mtrx{U}\mtrx{\Lambda}^k\mtrx{U}\transpose\) since \(\mtrx{U}\) is orthogonal. + This can also be applied to the (diagonal) constant term. +}% +\Textcite{gcn_spectral_semi} proposed to use \(K=1\) with several further optimizations we won't delve into. +Using \(K=1\) means that their method computes the activation of a node only from its activation and the activations of its neighbors at the previous layer. +This makes the \textsc{gcn} of \textcite{gcn_spectral_semi} quite similar to spatial methods described in Section~\ref{sec:graph:spatial gcn}. +All the equations given thus far were for a single scalar signal; however, we usually work with vector representations for all nodes, \(\mtrx{X}\in\symbb{R}^{n\times d}\). +In this case, the layer \(\ell\) of a \textsc{gcn} can be described as: +\begin{equation*} + \mtrx{H}^{(\ell+1)} = \ReLU\left((\mtrx{D}+\mtrx{I})^{-1\fracslash2} (\mtrx{M}+\mtrx{I}) (\mtrx{D}+\mtrx{I})^{-1\fracslash2} \mtrx{H}^{(\ell)} \mtrx{\Theta}^{(\ell)}\right) +\end{equation*} +Where \(\mtrx{\Theta}\in\symbb{R}^{d\times d}\) is the parameter matrix. +Using \(\mtrx{H}^{(0)} = \mtrx{X}\), we can use a \textsc{gcn} with \(L\) layers to combine the embeddings in the \(L\)-localized neighborhood of each vertex into a contextualized representation. + +\Textcite{gcn_spectral_semi} evaluate their model on the same \textsc{nell} dataset used by planetoid with the same 0.1\% labeling rate. +They train their model by maximizing the log-likelihood of labeled examples. +They obtain an accuracy of 66.0\%, which is an increase of 4.9 points over planetoid. + +\subsection{Spatial \textsc{gcn}} +\label{sec:graph:spatial gcn} +\begin{marginfigure} + \centering + \input{mainmatter/graph/graph convolution parallel.tex} + \scaption[Parallel between two-dimensional \textsc{cnn} data and \textsc{gcn} data.]{ + Parallel between two-dimensional \textsc{cnn} data and \textsc{gcn} data. + \label{fig:graph:graph convolution parallel} + } +\end{marginfigure} +\sidecite{graphsage}% XXX Insert the citation between the figure and the 1-dim CNN sidenote + +Spatial methods directly draw from the comparison with \textsc{cnn} in the spatial domain. +As shown by Figure~\ref{fig:graph:graph convolution parallel}, the lattice on which a 2-dimensional% +\sidenote{ + Even though the same comparison could be made with 1-dimensional \textsc{cnn} as introduced in Section~\ref{sec:context:cnn}, the similarity is less visually striking. + Especially when considering a filter of width 3, in which case the equivalent graph is a simple path graph: \input{mainmatter/graph/path graph.tex}\kern-1mm. +} +\textsc{cnn} is applied can be seen as a graph with a highly regular connectivity pattern. +In this section, we introduce spatial \textsc{gcn} by following the Graph\textsc{sage} model \parencite{graphsage}. + +When computing the activation of a specific node with a \textsc{cnn}, the filter is centered on this node, and each neighbor is multiplied with a corresponding filter element. +The products are then aggregated by summation. +Spatial \textsc{gcn}s purpose to mimic this process. +The main obstacle to generalizing this spatial view of convolutions to graphs is the irregularity of neighborhoods.% +\sidenote{ + Interestingly enough, this is also a problem with standard \textsc{cnn}s when dealing with values at the edges of the matrix. +} +In a graph, nodes have different numbers of neighbors; a fixed-size filter cannot be used. +Graph\textsc{sage} proposes several aggregators to replace this product--sum process: +\begin{description} + \item[Mean aggregator] The neighbors are averaged and then multiplied by a single filter \(\mtrx{W}^{(l)}\): + \begin{equation*} + \operatorname{aggregate}_\text{mean}^{(\ell+1)}(v) = \sigmoid\left(\mtrx{W}^{(\ell)} \frac{1}{\gfdegree(v)+1} \sum_{u\in\gfneighbors(v)\cup\{v\}} \hspace{-3mm} \vctr{h}_u^{(\ell)}\right). + \end{equation*} + A spatial \textsc{gcn} using this aggregator is close to the \textsc{gcn} of \textcite{gcn_spectral_semi} with \(K=1\) presented in Section~\ref{sec:graph:spectral gcn}. + \item[L\textmd{\textsc{stm}} aggregator] + An \textsc{lstm} (Section~\ref{sec:context:lstm}) is run through all neighbors with the final hidden state used as the output of the layer. + \begin{equation*} + \operatorname{aggregate}_\textsc{lstm}^{(\ell+1)}(v) = \operatorname{\textsc{lstm}}^{(\ell)}\left(\left(\vctr{h}_u^{(\ell)}\right)_{u\in \gfneighbors(v)}\right)_{\gfdegree(v)}. + \end{equation*} + Since \textsc{lstm}s are not permutation-invariant, the order in which the neighbors are presented is important. + \item[Pooling aggregator] + A linear layer is applied to all neighbors which are then pooled through a \(\max\) operation. + \begin{equation*} + \operatorname{aggregate}_\text{max}^{(\ell+1)}(v) = \max\left(\left\{\,\mtrx{W}^{(\ell)}\vctr{h}_u^{(\ell)} + \vctr{b}^{(\ell)} \middlerel{|} u\in\gfneighbors(v)\,\right\}\right). + \end{equation*} + Note that the maximum is applied feature-wise. +\end{description} +Using one of these aggregator, a Graph\textsc{sage} layer performs the three following operations for all vertices \(v\in V\): +\begin{marginparagraph} + As usual the matrices \(\mtrx{W}^{(l)}_i\) are trainable model parameters. +\end{marginparagraph} +\begin{align*} + \vctr{a}_v^{(\ell+1)} & \gets \operatorname{aggregate}^{(\ell+1)}(v) \\ + \vctr{h}_v^{(\ell+1)} & \gets \sigmoid\left(\mtrx{W}_1^{(\ell)} \vctr{h}_v^{(\ell)} + \mtrx{W}_2^{(\ell)} \vctr{a}_v^{(\ell+1)}\right) \\ + \vctr{h}_v^{(\ell+1)} & \gets \vctr{h}_v^{(\ell+1)} \divslash \big\|\vctr{h}_v^{(\ell+1)}\big\|_2 . +\end{align*} +However, this approach still performs poorly when the graph is irregular.% +\sidenote{ + In graph theory, a \(k\)-regular graph is a graph where all vertices have degree \(k\). + By irregular, we mean that the distribution of vertices degrees has high variance; we don't use the term in its formal ``highly irregular'' meaning. + This is indeed the case in scale-free graphs, as their variance is infinite when \(\gamma<3\). +} +In particular, high-degree vertices---such as ``United States'' in \textsc{t-re}x as described in Section~\ref{sec:graph:analysis}---incur significant memory usage. +To solve this, Graph\textsc{sage} proposes to sample a fixed-size neighborhood for each vertex during training. +Their representation is therefore computed from a small number of neighbors. +Since \(L\) layers of Graph\textsc{sage} produce \(L\)-localized representations, vertices need to be sampled at most at distance \(L\) of the vertex for which we want to generate a representation. +\Textcite{graphsage} propose an unsupervised negative sampling loss to train their \textsc{gcn} such that adjacent vertices have similar representations: +\begin{equation} + \loss{gs} = \sum_{(u, v)\in E} + \log \sigmoid\left(\vctr{z}_v\transpose \vctr{z}_u\right) + - \gamma \expectation_{v'\sim\uniformDistribution(V)} + \left[ \log \sigmoid\left(-\vctr{z}_{v'}\transpose \vctr{z}_u\right) \right] + \label{eq:graph:graphsage loss} +\end{equation} +where \(\mtrx{Z} = \mtrx{H}^{(L)}\) is the activation of the last layer and \(\gamma\) is the number of negative samples. + +One of the advantages of Graph\textsc{sage} compared to the approach of \textcite{gcn_spectral_semi} is that it is inductive, whereas the spectral \textsc{gcn} presented in Section~\ref{sec:graph:spectral gcn} is transductive. +Indeed, in the spectral approach, the filter is trained for a specific eigenvectors matrix \(\mtrx{U}\) which depends on the graph. +If the graph changes, everything must be re-trained from scratch. +In comparison, the parameters learned by Graph\textsc{sage} can be reused for a different graph without any problem. + +A limitation of Graph\textsc{sage} is that the contribution of each neighbor to the representation of a vertex \(v\) is either fixed at \(1\divslash (\gfdegree(v)+1)\) (with the mean aggregator) or not modeled explicitly. +The same can be observed with the model of \textcite{gcn_spectral_semi}, where the representation of each neighbor \(u\) is nonparametrically weighted by \(1\divslash\sqrt{\gfdegree(v)+\gfdegree(u)}\). + +In contrast, graph attention network (\textsc{gat}, \citex{graph_attention_network}) proposes to parametrize this weight with a model similar to the attention mechanism presented in Section~\ref{sec:context:attention}. +The output is built using an attention-like% +\sidenote{ + \Textcite{graph_attention_network} actually propose to use multi-head attention (Section~\ref{sec:context:transformer attention}). + We describe their model with a single attention head for ease of notation. +} +convex combination of transformed neighbors' representations: +\begin{equation*} + \vctr{h}^{(\ell+1)}_v \gets \sigmoid\left(\sum_{u\in\gfneighbors(v)\cup\{v\}} \hspace{-3mm} \alpha^{(\ell)}_{vu} \mtrx{W}^{(\ell)} \vctr{h}_u^{(\ell)}\right), +\end{equation*} +where \(\alpha^{(\ell)}_{vu}\), the attention given by \(v\) to neighbor \(u\) at layer \(\ell\), is computed using a softmax: +\begin{marginparagraph} + LeakyReLU \parencite{leakyrelu} is a variant of ReLU where the negative domain is linear with a small slope instead of being mapped to zero: + \begin{equation*} + \operatorname{LeakyReLU}(x) = + \begin{cases} + x & \text{if } x>0, \\ + 0.01x & \text{otherwise}. \\ + \end{cases} + \end{equation*} +\end{marginparagraph} +\begin{equation*} + \alpha^{(\ell)}_{vu} \propto \exp \operatorname{LeakyReLU}\left(\vctr{g}^{(\ell)\transposesym} + \begin{bmatrix} + \mtrx{W}^{(\ell)}_\textsc{gat} \vctr{h}_v \\ + \mtrx{W}^{(\ell)}_\textsc{gat} \vctr{h}_u + \end{bmatrix} \right). +\end{equation*} +As usual, the matrices \(\mtrx{W}\) are parameters, as well as the vector \(\vctr{g}\) which is used to combine the representations of the two vertices into a scalar weight. + +While \textsc{gat} and Graph\textsc{sage} can be trained in an unsupervised fashion following Equation~\ref{eq:graph:graphsage loss}, they can also be used as building blocks for larger models, similarly to how we use \textsc{cnn} in Chapter~\ref{chap:fitb}. +Coupled with the fact that they have a simpler theoretical background and are easier to implement, spatial methods have become ubiquitous to graph-based approaches in the last few years. + +\subsection{\textsc{gcn} on Relation Graphs} +\label{sec:graph:r-gcn} +All the work introduced in the above sections is about simple undirected graphs \(G=(V, E)\). +In contrast, in Section~\ref{sec:graph:encoding}, we encoded the relation extraction problem on attributed multigraphs \(G=(\entitySet, \arcSet, \gfendpoints, \gfrelation)\). +Some works propose to extend \textsc{gcn} to the case of multigraphs, especially when dealing with knowledge bases.% +\sidenote{ + In this case, the multigraph is simply labeled since the set of relations is finite. + In contrast, in the relation extraction problem, the multigraph is attributed. + The arcs are associated with a sentence from an infinite set of possible sentences. +} +This is the case of \textsc{r-gcn} \parencitex{rgcn}, a graph convolutional network for relational data. +The input graph is not labeled with sentences (\(\gfsentence\)) since \textsc{r-gcn} intents to model a knowledge base \(\kbSet\). +This means that while \(G\) is a multigraph, the subgraphs \(G_\gfsr\) are simple graphs for all relations \(r\in\relationSet\). +\textsc{r-gcn}s exploit this by using a separate \textsc{gcn} filter for each relation. +An \textsc{r-gcn} layer can be defined as: +\begin{marginparagraph} + Note that only the outgoing neighbors \(\gfneighborsrr\) are taken since for each incoming neighbor labeled \(r\), there is an outgoing one labeled \(\breve{r}\). +\end{marginparagraph} +\begin{equation} + \vctr{h}^{(\ell+1)}_v \gets \sigmoid\left(\mtrx{W}_0^{(\ell)} \vctr{h}_v^{(\ell)} + \sum_{r\in\relationSet}\sum_{u\in\gfneighborsrr(v)} \mtrx{W}_r^{(\ell)} \vctr{h}_u^{(\ell)} \right), + \label{eq:graph:r-gcn layer} +\end{equation} +\begin{marginparagraph} + Paralleling the notations used for \textsc{cnn}s in Section~\ref{sec:context:cnn}, we use \(d\) to denote the dimension of embeddings at layer \(\ell\) and \(d'\) for the dimension at layer \(\ell+1\). + More often than not, the same dimension is used at all layers \(d'=d\). + In the following, we use \(d\) as a generic notation for embedding and latent dimensions. +\end{marginparagraph} +where \(\mtrx{W}_0\in\symbb{R}^{d'\times d}\) is used for the (implicit) self-loop, while \(|\relationSet|\) different filters \(\mtrx{W}_r\in\symbb{R}^{d'\times d}\) are used for capturing the arcs. +With highly multi-relational data, the number of parameters grow rapidly since a full matrix needs to be estimated for all relations, even rare ones. +To address this issue, \textcite{rgcn} propose to either constrain the matrices \(\mtrx{W}_r\) to be block-diagonal, or to decompose them on a small basis \(\tnsr{Z}^{(\ell)}\in\symbb{R}^{B\times d'\times d}\): +\begin{equation*} + \mtrx{W}^{(\ell)}_r = \sum_{b=1}^{B} a_{rb}^{(\ell)} \mtrx{Z}_b^{(\ell)}, +\end{equation*} +where \(B\) is the size of the basis and \(\vctr{a}_r\) are the parametric weights for the matrices \(\mtrx{W}_r\). + +\Textcite{rgcn} evaluate their model on two tasks. +First, they evaluate on an entity classification task using a simple softmax layer with a cross-entropy loss on top of the vertex representation at the last layer (\(\mtrx{H}^{(L)}\) as defined by Equation~\ref{eq:graph:r-gcn layer}). +Second, more closely related to relation extraction, they evaluate on a relation prediction task. +\begin{marginparagraph} + This is similar to the evaluation of TransE reported in Section~\ref{sec:context:transe}; except that instead of predicting a missing entity in a tuple \((e_1, r, e_2)\in\kbSet\), the model must predict the missing relation, assuming \hypothesis{1-adjacency} in the process. +\end{marginparagraph} +Given a pair of entity \((e_1, e_2)\in\entitySet^2\), the model must predict the relation \(r\in\relationSet\) between them, such that \((e_1, r, e_2)\in\kbSet\). +To this end, \textcite{rgcn} employ the DistMult model which can be seen as a \textsc{rescal} model (Section~\ref{sec:context:rescal}) where the interaction matrices are diagonal. +The energy of a fact is defined as: +\begin{equation*} + \psi_\text{DistMult}(e_1, r, e_2) = \vctr{u}_{e_1}\transpose \mtrx{C}_r \vctr{u}_{e_2}, +\end{equation*} +where \(\vctr{u}_e\) is the embedding of the entity at the last layer of the \textsc{r-gcn}: \(\vctr{u}_e=\vctr{h}^{(L)}_e\) and \(\mtrx{C}_r\in\diagonal(\symbb{R}^d)\) is a diagonal matrix parameter. +The probability associated to a fact by DistMult is proportional to the exponential of the energy function \(\psi_\text{DistMult}\). +Therefore, a missing relation between \(e_1, e_2\in\entitySet\) can be predicted by taking the softmax over relations \(r\in\relationSet\) of \(\psi_\text{DistMult}(e_1, r, e_2)\). +\textsc{r-gcn}s are trained using negative sampling (Section~\ref{sec:context:negative sampling}) on the entity classification and relation prediction tasks. +This is similar to the training of TransE, where the main difference is that the entity embeddings are computed using \textsc{r-gcn} layers instead of being directly fetched from an entity embedding matrix. + +A limitation of \textsc{r-gcn}s is that they only rely on vertices' representation. +Even when the evaluation involves the classification of arcs (as is the case with relation prediction), this is only done by combining the representations of the endpoints (using DistMult). + +Several works build upon \textsc{r-gcn}. +\textsc{gp-gnn} \parencitex{gp-gnn} applies a similar model to the supervised relation extraction task. +In this case, the graph is attributed with sentences instead of relations; therefore, the weight matrices \(\mtrx{W}_r\) are generated from the sentences instead of using an index of all possible relations. +They apply their model to Wikipedia distantly supervised by Wikidata. +However, the classification is still made from the representation of the endpoints of arcs. +Related work also appears in the \emph{heterogeneous graph} community \parencitex{heterogeneous_attention, heterogeneous_transformer}. +Heterogeneous graphs are graphs with labels on both vertices and arcs. +The model proposed by \textcite{heterogeneous_transformer} is similar to \textsc{r-gcn} with an attention mechanism more akin to the transformer's attention (Section~\ref{sec:context:transformer attention}) than classical attention (Section~\ref{sec:context:attention}). +The canonical evaluation datasets of this community are citation graphs. +Vertices are assigned labels such as ``people,'' ``article'' and ``conference,'' while arcs are labeled with a small number of domain-specific relations: \textsl{author}, \textsl{published at}, \textsl{cite}, etc. +The evaluation task typically corresponds to entity prediction. + +\subsection{Weisfeiler--Leman Isomorphism Test} +\label{sec:graph:weisfeiler-leman} +\begin{marginfigure}[-18mm] + \centering + \input{mainmatter/graph/isomorphism.tex} + \scaption[Example of isomorphic graphs.]{ + Example of isomorphic graphs. + Each vertex \(i\) in the first graph corresponds to the \(i\)-th letter of the alphabet in the second graph. + Alternatively, these graphs have nontrivial automorphism, for example, by mapping vertex \(i\) to vertex \(9-i\). + \label{fig:graph:isomorphism} + } +\end{marginfigure} + +In this section, we introduce the theoretical background of \textsc{gcn}s. +This is of particular interest to us since this theoretical background is more closely related to unsupervised relation extraction than \textsc{gcn}s can be at first glance. +As stated in the introduction to the thesis, relations emerge from repetitions. +In particular, we expect that two identical (sub-)graphs convey the same relations. +However, testing whether two graphs are identical is a complex problem. +Indeed, we have to match each of the \(n\) vertices of the first graph to one of the \(n\) possibilities in the second graph. +Naively, we need to try all \(n!\) possibilities. +This is known as the graph isomorphism problem. +Two simple graphs \(G_1 = (V_1, E_1)\), \(G_2 = (V_2, E_2)\) are said to be isomorphic (\(G_1\simeq G_2\)) iff there exists a bijection \(f\colon V_1\to V_2\) such that \((u, v)\in E_1 \iff (f(u), f(v))\in E_2\). +Figure~\ref{fig:graph:isomorphism} gives an example of two isomorphic graphs. + +The various \textsc{gcn} methods introduced thus far can be seen as generalizations of the Weisfeiler--Leman% +\sidenote[][-1cm]{ + Often spelled Weisfeiler--Lehman, \textcite{leman_spelling} indicates that Andreĭ Leman preferred to transliterate his name without an ``h.'' +} +isomorphism test \parencitex{weisfeiler-leman}, which tests whether two graphs are isomorphic. +The \(k\)-dimensional Weisfeiler--Leman isomorphism test (\(k\)-dim \textsc{wl}) is a polynomial-time algorithm assigning a color to each \(k\)-tuple of vertices% +\sidenote{An ordered sequence of \(k\) vertices, that is an element of \(V^k\), not necessarily connected.} +such that two isomorphic graphs have the same coloring. +With a bit of work, the general \(k\)-dim \textsc{wl} algorithm can be implemented in \(O(k^2 n^{k+1}\log n)\) \parencite{weisfeiler-leman_complexity}. +However, there exist pairs of graphs that are not isomorphic, yet are assigned with the same coloring by the Weisfeiler--Leman test \parencitex{weisfeiler-leman_fail}[-2.5mm]. +At the time of writing, the precise membership of the graph isomorphism problem with respect to the polynomial complexity classes is still conjectural. +No polynomial-time algorithm nor reduction from \textsc{np}-complete problems are known. +This makes graph isomorphism one of the prime candidates for the \textsc{np}-intermediate complexity class.% +\sidenote{ + The class of \textsc{np} problems neither in \textsc{p} nor \textsc{np}-complete. + It is guaranteed to be non-empty if \(\textsc{p}\neq\textsc{np}\). + Clues for the \textsc{np}-intermediateness of the graph isomorphism problem can be found in the fact that the counting problem is in \textsc{np} \parencite{gi_counting} and more recently, from the fact that a quasi-polynomial algorithm exists \parencite{gi_quasipoly}. +} + +\begin{algorithm} + \centering + \begin{minipage}[b]{8cm} + \input{mainmatter/graph/Weisfeiler-Leman.tex} + \end{minipage} + \scaption[The Weisfeiler--Leman isomorphism test.]{ + The Weisfeiler--Leman isomorphism test. + The double braces \(\lMultiBrace\ \rMultiBrace\) denote a multiset. + Since \(\symfrak{I}_\ell\) is indexed with the previous coloring \(\chi_{\ell-1}(\vctr{x})\) of the vertices---alongside \(c_\ell(x)\)---the number of color classes is strictly increasing until the last iteration when it remains constant. + Since the last coloring is stable, we refer to it as \(\chi_\infty\). + \label{alg:graph:weisfeiler-leman} + } +\end{algorithm} + +The general \(k\)-dim \textsc{wl} test is detailed in Algorithm~\ref{alg:graph:weisfeiler-leman}. +It is a refinement algorithm, which means that at a given iteration, color classes can be split, but two \(k\)-tuples with different colors at iteration \(\ell\) can't have the same color at iteration \(\ell'>\ell\). +Initially, all \(k\)-tuples \(x\) are assigned a color according to their isomorphism class \(\operatorname{iso}(x)\). +We define the isomorphism class through an equivalence relation. +For two \(k\)-tuples \(\vctr{x}, \vctr{y}\in V^k\), \(\operatorname{iso}(x)=\operatorname{iso}(y)\) iff:% +\sidenote{ + To avoid having to align two colorings, the \textsc{wl} algorithm is usually run on the disjoint union of the two graphs. + So, strictly speaking, it tests for automorphism (isomorphic endomorphism). + Therefore we can assume \(\vctr{x}\) and \(\vctr{y}\) are from the same vertex set \(V\). +} +\begin{itemize} + \item \(\forall i, j \in [1, \dotsc, k]: x_i=x_j \iff y_i=y_j\) + \item \(\forall i, j \in [1, \dotsc, k]: (x_i, x_j)\in E \iff (y_i, y_j)\in E\) +\end{itemize} +Intuitively, this checks whether \(x_i \mapsto y_i\) is an isomorphism for the subgraphs built from the \(k\) vertices \(\vctr{x}\) and \(\vctr{y}\). +This is not the same as the graph isomorphism problem since here, the candidate isomorphism is given, we don't have to test the \(k!\) possibilities. + +The coloring of \(\vctr{x}\in V^k\) is refined at each step by juxtaposing it with the coloring of its neighbors \(\gfneighbors^k(\vctr{x})\). +We need to reindex the new colors at each step since the length of the color strings would grow exponentially otherwise. +The set of neighbors% +\sidenote{ + Note that the kind of neighborhood defined by \(\gfneighbors^k\) completely disregards the edges in the graph. + For this reason, it is sometimes called the \emph{global neighborhood}. +} +of a \(k\)-tuple for \(k\geq 2\) is defined as: +\begin{equation*} + \gfneighbors^k(\vctr{x}) = \left\{\, \vctr{y}\in V^k \middlerel{|} \exists i\in[1,\dotsc,k]: \forall j\in[1,\dotsc,k]: j\neq i \implies x_j=y_j \,\right\}. +\end{equation*} +In other words, the \(k\)-tuples \(\vctr{y}\) neighboring \(\vctr{x}\) are those differing by at most one vertex with \(\vctr{x}\). + +The 1-dim \textsc{wl} test is also called the \emph{color refinement} algorithm. +In this case, \(\gfneighbors^1(x)\) is simply \(\gfneighbors(x)\) the set of neighbors of \(x\). +The isomorphism class of a single vertex is always the same, so \(\chi_0\) assigns the same color to all vertices. +The first iteration of the algorithm groups vertices according to their degree (the multiplicity of the sole element in the multiset \(c_1(x)\)). +The second iteration \(\chi_2\) then colors each vertex according to its degree \(\chi_1\) and the degree of its neighbors \(c_2\). +And so on and so forth until \(\chi\) does not change anymore. + +The \textsc{gcn} introduced in the previous sections can be seen as variants of the 1-dim \textsc{wl} algorithm where the index \(\symfrak{I}_\ell\) is replaced with a neural network such as \(\operatorname{aggregate}^{(\ell)}_\text{mean}\) given in Section~\ref{sec:graph:spatial gcn}. +In this case \(\chi_\ell\) corresponds to \(\mtrx{H}^{(\ell)}\) the activations at layer \(\ell\). diff --git a/mainmatter/graph/samples example.tex b/mainmatter/graph/samples example.tex @@ -0,0 +1,38 @@ +\begin{tikzpicture}[ + gnode/.style={ + draw, + ellipse + }, + patext/.style 2 args={ + decoration={ + text align=center, + text along path, + text={|\marginsize||+#1|#2} + }, + decorate + }] + + \node[gnode] (mi5) at ( 7, 0) {\textsc{mi5}}; + \node[gnode] (thames) at ( 0, 3) {Thames House}; + \node[gnode] (smersh) at (-7, 0) {\textsc{smersh}}; + \node[gnode] (counter) at ( 0, -3) {counterintelligence}; + + \def\shiftA#1{\raisebox{-2.5ex}} + \def\shiftB#1{\raisebox{1ex}} + + \draw[thick,latex-] (thames.east) to [out=0,in=90] (mi5.north); + \draw[patext={\shiftB}{headquarters location}] (thames.east) to [out=0,in=90] (mi5.north); + \draw[patext={\shiftA}{The exterior and interior of Freemasons' Hall…}] (thames.east) to [out=0,in=90] (mi5.north); + + \draw[thick,-latex] (thames.south) to [out=-90,in=180] (mi5.west); + \draw[patext={\shiftB}{occupant}] (thames.south) to [out=-90,in=180] (mi5.west); + \draw[patext={\shiftA}{The Freemasons' Hall in London served as the filming…}] (thames.south) to [out=-90,in=180] (mi5.west); + + \draw[Dark2-B, thick,-latex] (smersh.south) to [out=-90,in=180] (counter.west); + \draw[patext={\shiftB}{field of work}] (smersh.south) to [out=-90,in=180] (counter.west); + \draw[patext={\shiftA}{In its counter-espionage and counter-…}] (smersh.south) to [out=-90,in=180] (counter.west); + + \draw[thick,latex-] (counter.east) to [out=0,in=-90] (mi5.south); + \draw[patext={\shiftB}{field of work}] (counter.east) to [out=0,in=-90] (mi5.south); + \draw[patext={\shiftA}{Golitsyn's claims about Wilson were believed…}] (counter.east) to [out=0,in=-90] (mi5.south); +\end{tikzpicture} diff --git a/mainmatter/relation extraction/aggregate.tex b/mainmatter/relation extraction/aggregate.tex @@ -0,0 +1,277 @@ +\section{Supervised Aggregate Extraction Models} +\label{sec:relation extraction:aggregate} +All the approaches introduced thus far are sentential. +They map each sample to a relation individually, without modeling the interactions between samples. +In contrast, this section focuses on aggregate approaches (Equation~\ref{eq:relation extraction:aggregate definition}). +Aggregate approaches explicitly model the connections between samples. +The most common aggregate method is to ensure the consistency of relations predicted for a given entity pair \(\vctr{e}\in\entitySet^2\) by processing together all sentences \(s\in\sentenceSet\) mentioning \(\vctr{e}\). +To this end, we define \(\dataSet^\vctr{e}\) to be the dataset \(\dataSet\) grouped by entity pairs. +Thus, instead of containing a sample \(x=(s, \vctr{e})\), the dataset \(\dataSet^\vctr{e}\) contains bag of mentions \(\vctr{x}=\{(s, \vctr{e}), (s', \vctr{e}), \dotsc\}\) of the same entity pair \(\vctr{e}\). +Most aggregate methods are built upon sentential approaches and provide a sentential assignment. +Therefore, more often than not, each sample is still mapped to a relation. +Therefore, the evaluations of aggregate methods follow the evaluations of sentential approaches introduced in Section~\ref{sec:relation extraction:supervised evaluation}. + +\subsection{Label Propagation} +\label{sec:relation extraction:label propagation} +To deal with the shortage of manually labeled data, one approach is to use labels weakly correlated with the samples as in distant supervision (Section~\ref{sec:relation extraction:distant supervision}). +Another approach is to label a small subset of the dataset but leave most samples unlabeled. +This is the semi-supervised approach. +The bootstrapped models (Section~\ref{sec:relation extraction:bootstrap}) can also be seen as semi-supervised approaches: a small number of labeled samples are given to the model, which then crawls the web to obtain new unsupervised samples. +The evaluation of semi-supervised models follows the one of supervised models described in Section~\ref{sec:relation extraction:supervised evaluation}. +The difference between the two lies in the fact that unsupervised samples can be used to gain a better estimate of the input distribution in the semi-supervised settings, while fully-supervised models cannot make use of unsupervised samples. + +Apart from bootstrapped models, one of the first semi-supervised relation extraction systems was proposed by \textcitex{label_propagation_re}. +They build their model on top of hand-engineered features (Section~\ref{sec:relation extraction:hand-designed features}) compared using a similarity function. +This is somewhat similar to kernel approaches (section~\ref{sec:relation extraction:kernel}), except that this function does not need to be positive semidefinite. +Given all samples in feature space, the labels from the supervised samples are propagated to the neighboring unlabeled samples using the label propagation algorithm \parencite{label_propagation} listed as Algorithm~\ref{alg:relation extraction:label propagation}. +This propagation takes the form of a convex combination of other samples' labels weighted by the similarity function. +Let's call \(\operatorname{sim}\) this unlabeled sample similarity function: +\begin{equation*} + \operatorname{sim}\colon (\sentenceSet\times\entitySet^2)\times(\sentenceSet\times\entitySet^2)\to\symbb{R}. +\end{equation*} +The label propagation algorithm builds a pairwise similarity matrix between labeled and unlabeled samples which have been column normalized then row normalized: +\begin{marginalgorithm} + \input{mainmatter/relation extraction/label propagation.tex} + \scaption[The label propagation algorithm.]{ + The label propagation algorithm. + The notation \(\delta_{a,b}\) is a Kronecker delta, equals to \(1\) if \(a=b\) and to \(0\) otherwise. + The two loops assigning to \(y_{ij}\) are simply enforcing that the relation assigned to the labeled samples do not deviate from their gold value. + \label{alg:relation extraction:label propagation} + } +\end{marginalgorithm} +\begin{equation} + t_{ij} \propto \frac{\exp\big(\operatorname{sim}(x_i, x_j)\big)}{\displaystyle \sum_{x_k\in \dataSet \cup \dataSet_\relationSet} \exp\big(\operatorname{sim}(x_k, x_j)\big)} \quad \text{for } i,j\in\{1,\dotsc,|\dataSet|+|\dataSet_\relationSet|\} + \label{eq:relation extraction:label propagation transition} +\end{equation} +The relation assigned to each unlabeled sample is then recomputed by aggregating the labels---whether these labels come from \(\dataSet_\relationSet\) or were computed at a previous iteration---of all other samples weighted by \(\mtrx{T}\). +Note that labels assigned to samples coming from \(\dataSet_\relationSet\) are not altered. +This operation is repeated until the label assignment stabilizes. +This label propagation algorithm has been shown to converge to a unique solution \parencite{label_propagation}. + +\Textcite{label_propagation_re} tried two similarity functions: the cosine and the Jensen--Shannon of the feature vectors. +They evaluated their approach on the \textsc{ace}~2003 dataset (Section~\ref{sec:datasets:ace}) using different fractions of the labels to show that while their model was roughly at the same performance level than others when using the whole dataset, it decisively outperformed other methods when using a small number of labels. + +\subsection{Multi-instance Multi-label} +\label{sec:relation extraction:miml} +Following the popularization of distant supervision by \textcite{distant}, training datasets gained in volume but lost in quality (see Section~\ref{sec:relation extraction:distant supervision}). +In order to create models more resilient to the large number of false-positive in distantly-supervised datasets, multi-instance approaches~\parencite{multi-instance} started to get traction. + +In the article of \textcite{distant}, all mentions of the same entity pair are viewed as a single sample to make a prediction. +Their model is a simple logistic classifier on top of hand-engineered features, which could only predict a single relation label per entity pair. +However, when aggregating the features of all mentions and supervising with a single relation, \textcite{distant} backpropagate to all features, i.e.~the parameters used by all mentions are modified. +This assumes that all mentions should convey the relation. +To avoid this assumption, the more sophisticated multi-instance assumption is used: +\begin{assumption}[multiinstance]{multi-instance} + All facts \((\vctr{e}, r)\in\kbSet\) are conveyed by at least one sentence of the unlabeled dataset \(\dataSet\). + + \smallskip + \noindent + \(\forall (e_1, e_2, r)\in\kbSet : \exists (s, e_1, e_2)\in\dataSet : (s, e_1, e_2) \text{ conveys } \tripletHolds{e_1}{r}{e_2}\) +\end{assumption} + +MultiR \parencitex{multir} follows such a multi-instance setup but also models multiple relations and thus does not assume \hypothesis{1-adjacency}, unlike all the models introduced thus far. +Figure~\ref{fig:relation extraction:miml setup} illustrates this setup, which is dubbed \textsc{miml} (multi-instance multi-label) following the subsequent work of \textcite{miml}. + +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/miml setup.tex} + \scaption[Multi-instance multi-label (\textsc{miml}) setup.]{ + Multi-instance (\(n>1\)) multi-label (\(m>1\)) setup. + Each entity pair appears in several instances and the two entities are linked by several relations. + \label{fig:relation extraction:miml setup} + } +\end{marginfigure} + +MultiR uses a latent variable \(z\) to capture the sentential extraction. +That is, for each sentence \(x_i\in\dataSet_\relationSet\), the latent variable \(\rndm{z}_i\in\relationSet\) captures the relation conveyed by \(x_i\). +Furthermore, for a given entity pair \(\vctr{e}\in\entitySet^2\), for all \(r\in\relationSet\), a binary classifier \(y_r\) is used to predict whether this pair is linked by \(r\). +In this fashion, multiple relations can be predicted for the same entity pair. +The model can be summarized by the plate diagram of Figure~\ref{fig:relation extraction:multir plate}. +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/multir plate.tex} + \scaption[MultiR plate diagram.]{ + MultiR plate diagram. + Where \tikz{\node[pdiag factor]{};} denotes factor nodes. + \label{fig:relation extraction:multir plate} + } +\end{marginfigure} +Let's define \(\dataSet_\relationSet^\vctr{e}\) the dataset \(\dataSet_\relationSet\) where samples are grouped by entity pairs. +Since multiple relations can link the same entity pair, we will use \(\vctr{y}\in \{0, 1\}^{\relationSet}\) to refer to the binary vector indexing the conveyed relations. +Formally, MultiR defines the probability of the sentential (\(\vctr{z}\)) and aggregate (\(\vctr{y}\)) assignments for a mention bag (\(\vctr{x}\)) as follow: +\begin{equation} + P(\vctr{y}, \vctr{z}\mid \vctr{x}; \vctr{\theta}) \propto \prod_{r\in\relationSet} \vctr{\phi}^\text{join}(y_r, \vctr{z}) \prod_{x_i\in\vctr{x}} \vctr{\phi}^\text{extract}(z_i, x_i; \vctr{\theta}) + \label{eq:relation extraction:multir} +\end{equation} +where \(\vctr{\phi}^\text{join}\) simply aggregate the predictions for all mentions: +\begin{equation*} + \vctr{\phi}^\text{join}(y_r, \vctr{z}) = + \begin{cases} + 1 & \text{if \(y_r=1 \land \exists i : z_i=r\)} \\ + 0 & \text{otherwise} + \end{cases} +\end{equation*} +and \(\vctr{\phi}^\text{extract}\) is a weighted sum of several hand-designed features: +\begin{equation*} + \vctr{\phi}^\text{extract}(z_i, x_i; \vctr{\theta}) = \exp\left( + \sum_{\text{feature \(j\)}} \theta_j \phi_j(z_i, x_i) + \right) +\end{equation*} + +We now describe the training algorithm used by MultiR, which is listed as Algorithm~\ref{alg:relation extraction:multir}. +Following the multi-instance setup, MultiR assumes that every fact \((e_1, r, e_2)\in\kbSet\) is conveyed by at least one mention \((s, e_1, e_2)\in\dataSet\). +This can be seen in the first product of Equation~\ref{eq:relation extraction:multir}: if a single gold relation is not predicted for any sentence, the whole probability mass function drops to 0. +This means that during inference, each relation \(r\) conveyed in the knowledge base must be covered by at least one sentential extraction \(z\). +\begin{marginparagraph} + In particular, note that if an entity pair is linked by more relations than it has mentions in the text, the algorithm collapses since each mention conveys a single relation. +\end{marginparagraph} +Given all sentences \(\vctr{x}_i\subseteq\dataSet\) containing an entity pair \((e_1, e_2)\), when the model does not predict the actual set of relations \(\vctr{y}_i=\{\,r \mid (e_1, r, e_2)\in\kbSet\,\}\), the parameters \(\vctr{\theta}\) must be tuned such that every relation \(r\in\vctr{y}_i\) is conveyed by at least one sentence, as expressed by the line: +\begin{algorithm}[t] + \centering + \begin{minipage}{7cm} + \input{mainmatter/relation extraction/multir.tex} + \end{minipage} + \scaption*[The MultiR training algorithm.]{ + The MultiR training algorithm. + For each bag of mentions \(\vctr{x}_i\), the more likely sentential and aggregate predictions \((\vctr{y}', \vctr{z}')\) are made. + If the predicted relations are different from the true relations \(\vctr{y}_i\) linking the two entities, the parameters \(\vctr{\theta}\) are adjusted such that \(\vctr{z}\) cover all relations in \(\vctr{y}_i\). + \label{alg:relation extraction:multir} + } +\end{algorithm} +\begin{equation*} + \vctr{z}^*\gets \argmax_{\vctr{z}} P(\vctr{z}\mid \vctr{x}_i, \vctr{y}_i; \vctr{\theta}). +\end{equation*} +This can be reframed as a weighted edge-cover problem, where the edge weights are given by \(\vctr{\phi}^\text{extract}(z_i, x_i; \vctr{\theta})\). +The MultiR training algorithm can be seen as maximizing the likelihood \(P(\vctr{y}\mid \vctr{x}; \vctr{\theta})\) where a Viterbi approximation was used---the expectations being replaced with maxima. + +The multi-instance multi-label (\textsc{miml}) phrase was introduced by \textcitex{miml}. +Their approach is similar to that of MultiR except that they train a classifier for \(\vctr{\phi}^\text{join}\) instead of using a deterministic process. +Their training procedure also differs. +They train in the Bayesian framework using an expectation--maximization algorithm. +In general, \textsc{miml} approaches are challenging to evaluate systematically since they suffer from low precision due to incomplete knowledge bases. +In particular, they were not compared with traditional supervised approaches. +For reference, \textcite{miml} compare the three methods mentioned in this section on the same datasets and observe that at the threshold at which recall goes over 30\%, the precision falls under 30\%. + +\subsection{Universal Schemas} +\label{sec:relation extraction:universal schemas} +Another important weakly-supervised model is the universal schema approach designed by \textcitex{universal_schemas}. +In their setting, existing relations and surface forms linking two entities are considered to be of the same nature. +Slightly departing from their terminology, we refer to the union of relations (\(\relationSet\)) and surface forms (\(\sentenceSet\)) by the term ``items'' (\(\itemSet=\relationSet\cup\sentenceSet\)) for their similarity with the collaborative filtering concept. +\Textcite{universal_schemas} consider that entity pairs are linked by items such that the dataset available could be refered to as \(\dataSet_\itemSet\subseteq\entitySet^2\times\itemSet\). +This can be obtained by taking the union of an unlabeled dataset \(\dataSet\) and a knowledge base \(\kbSet\). +This dataset \(\dataSet_\itemSet\) can be seen as a matrix with entity pairs corresponding to rows and items corresponding to columns. +With this in mind, relation extraction resembles collaborative filtering. +Figure~\ref{fig:relation extraction:universal schema matrix} gives an example of this matrix that we will call \(\mtrx{M}\in\symbb{R}^{\entitySet^2\times\itemSet}\). + +\begin{figure}[ht!] + \centering + \input{mainmatter/relation extraction/universal schema.tex} + \scaption[Universal schema matrix.]{ + Universal schema matrix. + Observed entity--item pairs are shown in green, blue cells are unobserved values, while orange cells are unobserved values for which a prediction was made. + The observed values on the left (surface forms) come from an unsupervised dataset \(\dataSet\), while the observed values on the right (relations) come from a knowledge base \(\kbSet\). + \label{fig:relation extraction:universal schema matrix} + } +\end{figure} + +\Textcite{universal_schemas} purpose to model this matrix using a combination of three models. +One of them being a low-rank matrix factorization: +\begin{equation*} + m^\text{F}_{ei} = \sum_{j=0}^d u_{ej} v_{ij} +\end{equation*} +where \(d\) is a hyperparameter, and \(\mtrx{U}\in\symbb{R}^{\entitySet^2\times d}\) and \(\mtrx{V}\in\symbb{R}^{\itemSet\times d}\) are model parameters. +The two other models are an inter-item neighborhood model and selectional preferences (described in Section~\ref{sec:context:selectional preferences}), which we do not detail here. +Training such a model is difficult since we do not have access to negative facts: not observing a sample \((\vctr{e}, i)\not\in\dataSet_\itemSet\) does not necessarily imply that this sample is false. +To cope with this issue, \textcite{universal_schemas} propose to use the Bayesian personalized ranking model (\textsc{bpr}, \citex{bpr}). +Instead of enforcing each element \(m_{ei}\) to be equal to \(1\) or \(0\), \textsc{bpr} relies upon a ranking objective pushing element observed to be true to be ranked higher than unobserved elements. +This is done through a contrastive objective between observed positive samples and unobserved negative samples from a uniform distribution: +\begin{equation*} +J_\textsc{us}(\vctr{\theta}) = + \sum_{(\vctr{e}^+,i)\in\dataSet_\itemSet} + \sum_{\substack{(\vctr{e}^-,i)\in\entitySet^2\times\itemSet\\(\vctr{e}^-,i)\not\in\dataSet_\itemSet}} + \log \sigma(m_{e^+i} - m_{e^-i}) +\end{equation*} +This objective can be directly maximized using stochastic gradient ascent. +\Textcite{universal_schemas} experiment on a \(\textsc{nyt}+\textsc{fb}\) dataset, this means the unsupervised dataset \(\dataSet\) comes from the New York Times (\textsc{nyt}, Section~\ref{sec:datasets:nyt}) and the knowledge base \(\kbSet\) is Freebase (\textsc{fb}, Section~\ref{sec:datasets:freebase}). + +\subsection{Aggregate \textsc{pcnn} Extraction} +\label{sec:relation extraction:pcnn aggregate} +\textsc{pcnn} is a sentence-level feature extractor introduced in Section~\ref{sec:relation extraction:pcnn}. +\Textcitex{pcnn} introduce the \textsc{pcnn} feature extractor together with a multi-instance learning algorithm. +Given a bag of mentions \(\vctr{x}\in\dataSet^\vctr{e}\), for each mention \(x_i\in\vctr{x}\), they model \(P(\rndm{r}\mid x_i; \vctr{\theta})\). +However, the optimization is done over each bag of mentions separately: +\begin{align} + \symcal{L}_\textsc{pcnn}(\vctr{\theta}) & = - \sum_{(\vctr{x}, r)\in\dataSet^\vctr{e}_\relationSet} \log P(r\mid x^*; \vctr{\theta}) + \label{eq:relation extraction:pcnn loss} \\ + x^* & = \argmax_{x_i\in \vctr{x}} P(r\mid x_i; \vctr{\theta}) + \label{eq:relation extraction:pcnn argmax} +\end{align} +In other words, for a set of mention \(\vctr{x}\) of an entity pair, the network backpropagates only on the sample that predicts a relation with the highest certainty. +Thus \textsc{pcnn} is a multi-instance single-relation model, it assumes \hypothesis{multi-instance} but also \hypothesis{1-adjacency}. + +\Textcite{pcnn} continue to use the experimental setup of \textcite{miml}, i.e.~using a distantly supervised dataset, but complement it with a manual evaluation to have a better estimate of the precision. + +\Textcitex{pcnn_attention} improve the \textsc{pcnn} model with an attention mechanism over mentions to replace the \(\argmax\) of Equation~\ref{eq:relation extraction:pcnn argmax}. +The attention mechanism's memory is built from the output of the \textsc{pcnn} on each mention without applying a softmax; the \textsc{pcnn} is simply used to produce a representation for each mention. +Equations~\ref{eq:relation extraction:pcnn loss} and~\ref{eq:relation extraction:pcnn argmax} are then replaced by: +\begin{align*} + \symcal{L}_\text{Lin}(\vctr{\theta}) & = - \sum_{(\vctr{x}, r)\in\dataSet^\vctr{e}_\relationSet} \log P(r\mid \vctr{x}; \vctr{\theta}) \\ + P(r\mid \vctr{x}; \vctr{\theta}) & \propto \exp( \mtrx{W} \vctr{s}(\vctr{x}, r) + \vctr{b} ) \\ + \vctr{s}(\vctr{x}, r) & = \sum_{x_i\in\vctr{x}} \alpha_i \operatorname{\textsc{pcnn}}(x_i) +\end{align*} +where the \(\alpha_i\) are attention weights computed from a bilinear product between the query \(r\) and the memory \(\operatorname{\textsc{pcnn}}(\vctr{x})\), similarly to the setup of Section~\ref{sec:context:attention}. +\Textcite{pcnn_attention} show that this modification improves the results of \textsc{pcnn}, this can be seen as a relaxation of \hypothesis{multi-instance}: the standard \textsc{pcnn} approach assumes that each fact in \(\kbSet\) is conveyed by a single sentence through its \(\argmax\); in contrast, the attention approach simply assumes that all facts are conveyed in \(\dataSet\), at least by one sentence but possibly by several ones. + +\subsection{Entity Pair Graph} +\label{sec:relation extraction:epgnn} +The multi-instance approach shares information at the entity pair level. +However, information could also be shared between different entity pairs. +This is the idea put forth by entity pair graph neural network (\textsc{epgnn}, \citex{epgnn}). +The basic sharing unit becomes the entity: when two mentions \((s, e_1, e_2), (s', e_1', e_2')\in\dataSet\) share at least one entity (\(\{e_1, e_2\}\cap\{e_1', e_2'\}\neq\emptyset\)), their features interact with each other in order to make a prediction. +The sharing of information is made following an entity pair graph that links together bags of mentions with a common entity as illustrated in Figure~\ref{fig:relation extraction:entity pair graph}. + +\begin{figure}[ht!] + \centering + \input{mainmatter/relation extraction/entity pair graph.tex} + \scaption[Entity pair graph.]{ + Entity pair graph. + Each node corresponds to a bag of mentions, each edge of the graph corresponds to an entity in common between the two bags, the edges are labeled with the shared entity. + For illustration purpose, we show a single sample per bag. + This example is from the SemEval 2010 Task 8 dataset (described in Section~\ref{sec:datasets:semeval}). + All sentences convey the \textsl{entity-destination} relation. + \label{fig:relation extraction:entity pair graph} + } +\end{figure} + +To obtain a distributed representation for a sentence, \textsc{epgnn} uses \textsc{bert} (Section~\ref{sec:context:transformers}). +More precisely, it combines the embedding of the \textsc{cls} token% +\sidenote{ + As a reminder, the \textsc{cls} token is the marker for the beginning of the sentence, its embedding purposes to represent the whole sentence. +} +with the embeddings corresponding to the two entities through a mean pooling. +The sentence feature extraction architecture is illustrated by Figure~\ref{fig:relation extraction:epgnn sentence representation}. +This is one of several methods to obtain an entity-aware fixed-size representation of a tagged sentence; other approaches are developed in Section~\ref{sec:relation extraction:mtb sentential}. + +\begin{figure}[ht!] + \centering + \input{mainmatter/relation extraction/epgnn sentence representation.tex} + \scaption[\textsc{epgnn} sentence representation.]{ + \textsc{epgnn} sentence representation. + ``Bentham'' was split into two subword tokens, ``Ben-'' and ``-tham'' by the \textsc{bpe} algorithm described in Section~\ref{sec:context:bpe}. + The contextualized embeddings of most words are ignored. + The final representation is only built using the entities span and the \textsc{cls} token. + Not appearing on the figure are linear layers used to post-process the output of the mean poolings and the final representation as well as a \(\ReLU\) non-linearity. + Compare to Figure~\ref{fig:relation extraction:emes}. + \label{fig:relation extraction:epgnn sentence representation} + } +\end{figure} + +Given a vector representation for each sentence in the dataset, we can label the vertices of the entity pair graph. +A spectral graph convolutional network (\textsc{gcn}, Section~\ref{sec:graph:spectral gcn}) is then used to aggregate the information of its neighboring samples into each vertex. +Thus, \textsc{epgnn} produces two representations for a sample: one sentential and one topological. +From these two representations, a prediction is made using a linear and softmax layer. +Since a single relation is produced for each sample, \textsc{epgnn} is trained using the usual classification cross-entropy loss. +More details on graph-based approaches are given in Chapter~\ref{chap:graph}. + +\Textcite{epgnn} evaluate \textsc{epgnn} on two datasets, SemEval~2010 Task~8 (Section~\ref{sec:datasets:semeval}) and \textsc{ace}~2005 (Section~\ref{sec:datasets:ace}). +Reaching a half-directed macro-\(\overHalfdirected{\fone}\) of 90.2\% on the first one, and a micro-\fone{} of 77.1\% on the second. diff --git a/mainmatter/relation extraction/bootstrap algorithm.tex b/mainmatter/relation extraction/bootstrap algorithm.tex @@ -0,0 +1,16 @@ +\begin{algorithmic} + \Function{bootstrap}{} + \FunctionInputs{} \(\dataSet\) unlabeled dataset + \FunctionInputs*{} \(O\) or \(R\) seed + \FunctionOutputs{} \(O\) occurrences + \FunctionOutputs*{} \(R\) rules + \State + \State Start with either \(O\) or \(R\) + \Loop + \State \(O\gets \{x\in \dataSet\mid R \text{ matches on } x\}\) + \State \(R\gets \text{induce rules from}\) + \State \hphantom{\(R\gets\)} occurrences \(O\) + \EndLoop + \State \Output \(O, R\) + \EndFunction +\end{algorithmic} diff --git a/mainmatter/relation extraction/chapter.tex b/mainmatter/relation extraction/chapter.tex @@ -0,0 +1,22 @@ +\chapter{Relation Extraction} +\label{chap:relation extraction} +\begin{epigraph} + {Augustus De Morgan} + {\citetitle{demorgan_syllogism3}} + {\cite*[p.~203]{demorgan_syllogism3}} + \kern2mm% Sadly unable to fix this underful hbox gracefuly, adding space between the quotes and the When seems to give the best result. + When two objects, qualities, classes, or attributes, viewed together by the mind, are seen under some connexion, that connexion is called a relation. +\end{epigraph} +\begin{epigraph} + {Maciej Cegłowski} + {\citetitle{constraints_design}} + {\cite*{constraints_design}} + Hard constraints are the midwife to good design. +\end{epigraph} +\input{mainmatter/relation extraction/introduction.tex} +\input{mainmatter/relation extraction/definition.tex} +\input{mainmatter/relation extraction/supervision.tex} +\input{mainmatter/relation extraction/sentential.tex} +\input{mainmatter/relation extraction/aggregate.tex} +\input{mainmatter/relation extraction/unsupervised.tex} +\input{mainmatter/relation extraction/conclusion.tex} diff --git a/mainmatter/relation extraction/clustering metrics.tex b/mainmatter/relation extraction/clustering metrics.tex @@ -0,0 +1,44 @@ +\begin{tikzpicture}[ + sample/.style={minimum width=3mm, inner sep=0mm}, + goldA/.style={sample, fill=Dark2-A, circle}, + goldB/.style={sample, fill=Dark2-B, regular polygon, regular polygon sides=3}, + goldC/.style={sample, fill=Dark2-C, star, star points=5}, + ] + + \draw[rounded corners=1mm] (0, 0) -- (35mm, 0) -- (35mm, -9mm) -- (0, -9mm) -- cycle; + \draw (11mm, 0) -- (11mm, -9mm); + \draw (23mm, 0) -- (23mm, -9mm); + + \node[goldC] at (3mm, -5mm) {}; + \node[goldA] at (6mm, -7mm) {}; + \node[goldA] at (7mm, -2mm) {}; + + \node[goldC] at (16mm, -2mm) {}; + \node[goldB] at (18mm, -6mm) {}; + \node[goldB] at (20mm, -2mm) {}; + + \node[goldC] at (27mm, -6mm) {}; + \node[goldC] at (29mm, -2mm) {}; + \node[goldC] at (32mm, -4mm) {}; + + \begin{scope}[shift={(0,-15mm)}] + \draw[rounded corners=1mm] (0, 0) -- (35mm, 0) -- (35mm, -9mm) -- (0, -9mm) -- cycle; + \draw (11mm, 0) -- (11mm, -9mm); + \draw (23mm, 0) -- (23mm, -9mm); + + \node[goldA] at (3mm, -5mm) {}; + \node[goldA] at (7mm, -7mm) {}; + \node[goldB] at (6mm, -3mm) {}; + + \node[goldA] at (16mm, -2mm) {}; + \node[goldA] at (15mm, -6mm) {}; + \node[goldB] at (20mm, -4mm) {}; + + \node[goldC] at (27mm, -5mm) {}; + \node[goldC] at (29mm, -2mm) {}; + \node[goldC] at (32mm, -6mm) {}; + \end{scope} + + \node[anchor=east] at (15mm, -12mm) {\bcubed{} \rotatebox{90}{\(<\)}}; + \node[anchor=west] at (20mm, -12mm) {\rotatebox{90}{\(>\)} V-measure}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/conclusion.tex b/mainmatter/relation extraction/conclusion.tex @@ -0,0 +1,21 @@ +\section{Conclusion} +\label{sec:relation extraction:conclusion} +In this chapter, we introduced the relation extraction tasks (Section~\ref{sec:relation extraction:definition}) and the different supervision schema with which we can tackle them (Section~\ref{sec:relation extraction:supervision}). +As we showed, the development of supervised relation extraction models closely followed the evolution of \textsc{nlp} models introduced in Section~\ref{sec:context:sentence}. +This is particularly visible in Section~\ref{sec:relation extraction:sentential}, which follows the progress of sentential relation extraction approaches. +Furthermore, the expansion of the scale at which problems are tackled is visible both on the \textsc{nlp} side with the word-level to sentence-level evolution and on the information extraction side with the sentential to aggregate extraction evolution. +The aggregate models, which are more aligned with the information extraction field, are presented in Section~\ref{sec:relation extraction:aggregate}. +Within these models, we also see the evolution from the simple max-pooling of \textsc{miml} (Section~\ref{sec:relation extraction:miml}) toward more sophisticated approaches which model the topology of the dataset more finely (Section~\ref{sec:relation extraction:epgnn}). + +We limited our presentation of supervised models to those critical to the development of unsupervised models. +Several recent approaches propose to reframe supervised relation extraction---and other tasks---as language modeling \parencitex{t5}[-9mm] or question answering \parencitex{span_prediction}[-2mm] tasks. +Since these approaches were not explored in the unsupervised setup yet, we omit them from our related work. + +Finally, Section~\ref{sec:relation extraction:unsupervised} focused on the specific setup of interest to this thesis: unsupervised relation extraction. +This setup is particularly complex due to the discrepancy between the expressiveness of our supervised models and the weakness of the semantic signal we are seeking to extract. +As we saw, modeling hypotheses are central to tackling this problem. +Early models, including supervised ones, relied on strong hypotheses to facilitate training. +However, while supervised models can now use deep neural networks without any hypothesis other than the unbiasedness of their data, unsupervised models still need to rely on strong assumptions. + +In the next section, we focus on unsupervised discriminative models, in particular the \textsc{vae} model presented in Section~\ref{sec:relation extraction:vae}. +In particular, we propose better losses for enforcing \hypothesis{uniform}, which avoid problematic degenerate solutions of the clustering relation extraction task. diff --git a/mainmatter/relation extraction/definition.tex b/mainmatter/relation extraction/definition.tex @@ -0,0 +1,239 @@ +\section{Task Definitions} +\label{sec:relation extraction:definition} +The relation extraction task was shaped by several datasets with different goals. +The first \textsc{muc}s focused on detecting naval sightings and engagement in military messages. +Subsequent conferences moved towards the extraction of business-related relations in news reports. +Nowadays, general encyclopedic knowledge is usually extracted from either news reports or encyclopedia pages. +Another common goal is to extract drugs, chemical and symptoms interactions in biomedical texts~\parencite{biobert}. +For further details, Appendix~\ref{chap:datasets} contains a list of datasets with information about the source of the text and the nature of the relations to be extracted. +Depending on the end-goal for which relation extraction is used, different definitions of the task might be more fitting. +We now formally define the relation extraction task and explore its popular variants. + +\begin{marginparagraph} + For ease of notation, we changed the placement of entities in the tuple corresponding to a fact from the one used in Section~\ref{sec:context:knowledge base}. + This will allow us to refer to the entity pair as \(\vctr{e}\in\entitySet^2\). +\end{marginparagraph} +In relation extraction, we assume that information can be represented as a knowledge base \(\kbSet\subseteq\entitySet^2\times\relationSet\) as defined in Section~\ref{sec:context:knowledge base}. +In addition to the set of entities \(\entitySet\) and the set of relations \(\relationSet\), we need to define the source of information from which to extract relations. +The information source can come in several different forms, but we use a single basic definition on sentences which we can refine later on. +We assume entity chunking was performed on our input data. +We only deal with binary relations% +\sidenote[][-1cm]{ + As described in Section~\ref{sec:context:relation algebra}, this means that only relations between two entities are considered. + Moreover, higher-arity relations can be decomposed into sets of binary ones. +} +since they are the ones commonly encoded in knowledge bases. +We can therefore define \(\sentenceSet\) as a set of sentences with two tagged and ordered entities: +\begin{align*} + \sentenceSet = \{ & \text{``\uhead{Jan Kasl} became mayor of \utail{Prague}.''},\\ + & \text{``\utail{Vincent Callebaut} was born in 1977 in \uhead{Belgium}.''},\\ + & \dotsc\}. +\end{align*} +\begin{marginparagraph} + Relation extraction can also be performed on semi-structured documents, such as a Wikipedia page with its infobox or an \textsc{html} page that might contain lists and tables. + This is the case of \textsc{dipre} presented in Section~\ref{sec:relation extraction:dipre}. + As long as the semi-structured data can be represented as a token list, and standard text models can still be applied. +\end{marginparagraph} +In this example, two sentences are given; in each sentence, the relation we seek is the one between the two entities marked by underlines. +The entities need to be ordered since most relations are asymmetric (\(r\neq\breve{r}\)). +In practice, this means that one entity is tagged as \(e_1\) and the other as \(e_2\). +The standard setting is to work on sentences; this can of course be generalized to larger chunks of text if needed. + +The tagged entities inside the sentences of \(\sentenceSet\) are not the same as entities in knowledge bases. +They are merely surface forms. +These surface forms are not sensu stricto elements of \(\entitySet\). +Indeed, the same entity can have several different surface forms, and the same surface form can be linked to several different entities depending on context. +To map these tagged surface forms to \(\entitySet\), entity linking is usually performed on the corpus. +In practice, this means that we consider samples from \(\sentenceSet\times\entitySet\times\entitySet\). +Finally, since the two tagged entities are ordered, we simply assume that the first entity in the tuple corresponds to the entity tagged \(e_1\) in the sentence, while the second entity refers to \(e_2\).% +\sidenote{Note that \(e_2\) can appears before \(e_1\) in the sentence.} +If entity linking is not performed on the dataset, we can simply assume that the surface forms are actually entities, in this case, and in this case alone, \(\entitySet\) is a set of surface forms. +This is somewhat uncommon, the standard practice being to have linked entities. + +Also, note that this setup is still valid for sentences with three or more entities, as we can consider all possible entity pairs: +\begin{align*} + \sentenceSet = \{ & \parbox[t]{10cm}{``\uhead{Alonzo Church} was born on June 14, 1903, in \utail{Washington, D.C.}, where his father, Samuel Robbins Church, was the judge of the Municipal Court for the District of Columbia.'',}\\ + & \parbox[t]{10cm}{``\utail{Alonzo Church} was born on June 14, 1903, in Washington, D.C., where his father, \uhead{Samuel Robbins Church}, was the judge of the Municipal Court for the District of Columbia.'',}\\ + & \dotsc\}. +\end{align*} +In this example, we give two elements from \(\sentenceSet\), these elements are different since their markings \(\uent{\quad}\) differ. +We often use the word sentence without qualifications to refer to elements from \(\sentenceSet\). +Still, even though the two sentences above are the same in the familiar sense of the term, they are different in our definition. + +Now, given a sentence with two tagged, ordered, and linked entities, we can state the goal of relation extraction as finding the semantic relation linking the two entities as conveyed by the sentence. +Since the set of possible relations is designated by \(\relationSet\), we can sum up the relation extraction task as finding a mapping taking the form: +\begin{equation} + \boxed{ + f_\text{sentential}\colon \sentenceSet\times\entitySet^2 \to \relationSet + } + \label{eq:relation extraction:sentential definition} +\end{equation} + +When we have access to a supervised dataset, all the information (head entity, relation, tail entity, conveying sentence) is provided. +Table~\ref{tab:relation extraction:supervised samples} gives some supervised samples examples. +We denote a dataset of sentences with tagged, ordered, and linked entities as \(\dataSet\subseteq\sentenceSet\times\entitySet^2\) and a supervised dataset as \(\dataSet_\relationSet\subseteq\dataSet\times\relationSet\). +Given an entity pair \(\vctr{e}=(e_1, e_2)\), a sample in which these entities appear \((s, e_1, e_2)\) is called a \emph{mention}. +A sample which convey a fact \tripletHolds{e_1}{r}{e_2} is called an \emph{instance} of \(r\). +\begin{marginparagraph} + Mentions as defined here can be called ``entity mentions,'' while instances may be referred to as ``relation mentions.'' +\end{marginparagraph} + +\begin{table} + \input{mainmatter/relation extraction/supervised samples.tex} + \scaption[Example of supervised samples from the FewRel dataset]{ + Samples from the FewRel dataset. + The surface forms in the head, relation and tail columns are only given for ease of reading and are usually not provided. + \label{tab:relation extraction:supervised samples} + } +\end{table} + +The relation extraction task as stated by Equation~\ref{eq:relation extraction:sentential definition} is called \emph{sentential extraction}. +It is the traditional relation extraction setup, the sentences are considered one by one, and a relation is predicted for each sentence separately. +However, information can be leveraged from the regularities of the dataset itself. +Indeed, some facts can be repeated in multiple sentences, in which case a model could enforce some kind of consistency on its predictions. +Even beyond a simple consistency of the relations predicted, in the same fashion that a word can be defined by its context, so can an entity. +This kind of regularities can be exploited by modeling a dependency between samples even when conditioned on the model parameters. +While tackling relation extraction at the sentence level might be sufficient for some datasets, others might benefit from larger context, especially when the end goal is to build a knowledge base containing general facts. +This gives rise to the \emph{aggregate extraction} setting, in which a set of tagged sentences is directly mapped to a set of facts without a direct correspondence between individual sentences and individual facts. +\begin{marginparagraph} + The left-hand side of Equation~\ref{eq:relation extraction:aggregate definition} is a subset of \(\sentenceSet\times\entitySet^2\), that is \(\dataSet\) or a subset thereof. + On the right-hand side, we have a subset of \(\entitySet^2\times\relationSet\); we tintend to find \(\kbSet\) or a subset thereof. + However, each individual sample \((s, \vctr{e})\in\dataSet\) does not need to be mapped to an individual fact \((\vctr{e}, r)\in\kbSet\). +\end{marginparagraph} +\begin{equation} + \boxed{ + f_\text{aggregate}\colon 2^{\sentenceSet\times\entitySet^2} \to 2^{\entitySet^2\times\relationSet} + } + \label{eq:relation extraction:aggregate definition} +\end{equation} +Quite often in this case, the problem is tackled at the level of entity pairs, meaning that instead of making a prediction from a sample in \(\sentenceSet\times\entitySet^2\), the prediction is made from \(2^\sentenceSet\times\entitySet^2\). +This setup is required for multi-instance approaches presented in Section~\ref{sec:relation extraction:miml}. +Aggregate extraction may impose a relatively more transductive approach% +\sidenote{ + Transductive approaches are contrasted to inductive approaches. + In the inductive approach---such as neural networks---parameters \(\vctr{\theta}\) are estimated from the training set. + When labeling on an unknown sample, the model makes its prediction only from parameters \(\vctr{\theta}\) and the unlabeled sample, access to the training set is no longer necessary. + This is called induction since ``rules'' (\(\vctr{\theta}\)) are obtained from examples. + On the other hand,% + \unskip\parfillskip 0pt% XXX Too lazy to properly handle sidenote page break +} +since predictions rely directly on previously observed samples. +Usually, aggregate models still extract some form of prediction at the sentence level, even if they do not need to. +Therefore, the key point of aggregate approaches is the explicit handling of dataset-level information. +Some models may heavily depend on this global information, to the point that they cannot be trained without some form of repetition in the dataset. +The sentential--aggregate distinction constitutes a spectrum. +While all unsupervised methods exhibit some aggregate traits, they do not necessarily exploit as much structural information as they could; this is the key point of Chapter~\ref{chap:graph}. + +\subsection{Nature of Relations} +\begin{marginparagraph}[-9mm]% XXX Remainder of the Transductive approaches \sidenote + in the transductive approach--such as \textsc{k-nn}---observations on the train set are directly transferred to test samples without first generalizing to a set of rules. +\end{marginparagraph} +The supervised relation extraction task described above is quite generic. +The approaches to tackle it in practice vary quite a lot depending on the specific nature of the facts we seek to extract and the corpus structure. +In this subsection, we present some variations on the nature of \(\relationSet\) commonly encountered in the literature. + +\subsubsection{Unspecified Relation: \textsl{Other}} +\label{sec:relation extraction:other} +The set \(\relationSet\) is built using a finite set of labels. +These labels do not describe the relationship between all entities in all possible sentences. +Indeed some entities are deemed unrelated in some sentences. +A distinction is sometimes made between relation extraction and relation detection, depending on whether a relation is assumed to exist between the two entities in a sentence or not. +This apparent absence of relation is often called ``\textsl{other},'' since a relation between the two entities might exist but is simply not present in the relation schema considered~\parencitex{semeval2010task8}. +In this case, we can still use the usual relation extraction setup by augmenting \(\relationSet\) with the following relation: +\begin{marginparagraph} + We use the notation of Section~\ref{sec:context:relation algebra} where \(\bar{r}\) refers to the complementary relation of the named relations \(r\) in the schema \(\relationSet\). + Note that using the definition of relations as a set of entity pairs is not strictly correct here since two entities may be linked by a relation that is simply not conveyed by a specific sentence containing them. + The underlying problem to this notational conundrum is the fact that \textsl{other} is only needed for mono-relation extraction when one and exactly one relation must be predicted for a sample; see Section~\ref{sec:relation extraction:miml} for an alternative. + The definition given in Equation~\ref{eq:relation extraction:other} is nonetheless fitting to the widespread distant supervision setting which we describe Section~\ref{sec:relation extraction:distant supervision}. +\end{marginparagraph} +\begin{equation} + \textsl{other} = \bigcap_{r\in\relationSet} \bar{r}. + \label{eq:relation extraction:other} +\end{equation} +However note that ``\textsl{other}'' is not a relation like the others, it is defined by what it is not instead of being defined by what it is. +This peculiarity calls for special care on how it is handled, especially during evaluation. + +\subsubsection{Closed-domain Assumption} +\label{sec:relation extraction:domain restriction} +As stated above, the set \(\relationSet\) is usually built from a finite set of labels such as \textsl{parent of} and \textsl{part of}. +This is referred to as the \emph{closed-domain assumption}. +Another approach is to consider \(\relationSet\) is not known beforehand~\parencitex{oie}. +In particular open information extraction (\textsc{oie}, Section~\ref{sec:relation extraction:oie}) directly uses surface forms as relation labels. +In this case, the elements of \(\relationSet\) are strings of words, not defined in advance, and even potentially not-finite. +We can see \textsc{oie} as a preliminary task to relation extraction: the set of surface forms can be mapped to a traditional closed-set of labels. +When \(\relationSet\) is not known beforehand, the relation extraction problem can be called \emph{open-domain relation discovery}. +This is the usual setup for unsupervised relation extraction described in Section~\ref{sec:relation extraction:unsupervised}. + +\subsubsection{Directionality and Ontology} +\label{sec:relation extraction:directionality} +Most relations \(r\) are not symmetric (\(r\neq\breve{r}\)). +There are several different approaches to handle this asymmetry. +In the SemEval 2010 Task 8 dataset (Section~\ref{sec:datasets:semeval}), the first entity in the sentence is always tagged \(e_1\), and the second is always tagged \(e_2\). +The relation set \(\relationSet\) is closed under the converse operation~\parencite{semeval2010task8}: +\begin{equation*} + \forall r\in\relationSet: \breve{r}\in\relationSet. +\end{equation*} +This is the most common setup. +In this case, the relation labels incorporate the directionality; for example, the SemEval dataset contains both \(\textsl{cause--effect}(e_1, e_2)\) and \(\textsl{cause--effect}(e_2, e_1)\) depending on whether the first entity appearing in the sentence is the cause or the effect. +This means that given a \(r\in\relationSet\) in the SemEval dataset, we can easily query the corresponding \(\breve{r}\). +On the other hand, the relation set of the FewRel dataset (Section~\ref{sec:datasets:fewrel}) is not closed under the converse operation~\parencitex{fewrel}. +Furthermore, it is a mono-relation dataset without \textsl{other}. +This means that all samples \((s, e_1, e_2)\in\dataSet\) convey a relation between \(e_1\) and \(e_2\). +Naturally, in this case, the entity tagged \(e_2\) may appear before the one tagged \(e_1\). +And indeed, for relations that do not have their converse in \(\relationSet\), the same sentence \(s\) with the tags reversed may not appear in the FewRel dataset since this would need to be categorized as \(\breve{r}\not\in\relationSet\). + +In general, the order of \(e_1\) and \(e_2\) is not fixed. +This is particularly true in the open-domain relation setup, when \(\relationSet\) being unknown, can not be equipped with the converse operation. +In this case, it is common to feed the samples in both arrangements: with the first entity tagged \(e_1\) and the second \(e_2\), and the reverse: with the first entity tagged \(e_2\) and the second \(e_1\). +This can be seen as a basic data augmentation technique. + +More generally, the relation set \(\relationSet\) might possess a structure called a \emph{relation ontology}. +This is especially true when \(\relationSet\) comes from a knowledge base such as Wikidata~\parencite{wikidata}. +In this case, \(\relationSet\) can be equipped with several operations other than the converse one. +For example, Wikidata endows \(\relationSet\) with a subset operation, the relation \textsl{parent organization} \wdrel{749} is recorded as a subset of \textsl{part of} \wdrel{361}, such that \(\sfTripletHolds{e_1}{parent organization}{e_2} \implies \sfTripletHolds{e_1}{part of}{e_2}\), or using the notation of Section~\ref{sec:context:relation algebra}: \(\textsl{parent organization} \relationOr \textsl{part of} = \textsl{part of}\). + +\subsection{Nature of Entities} +\label{sec:relation extraction:entity} +The approach to tackle the relation extraction task also quite heavily depends on the nature of entities. +In particular, an important distinction must be made on whether the \emph{unique referent assumption} is postulated. +This has been the case in most examples given thus far. +For instance, ``Alan Turing'' designates a single human being, even if several people share this name; we only designate one of them with the entity \wdent{7251} ``Alan Turing.'' +However, this is not always the case, for example, in the following sample from the SemEval 2010 Task 8 dataset: +\begin{marginparagraph} + SemEval 2010 Task 8 is one of those datasets without entity linking, which is rather common when dealing with non-unique referents. +\end{marginparagraph} +\begin{indentedexample} + The \uhead{key} was in a \utail{chest}.\\ + Relation: \(\textsl{content--container}(e_1, e_2)\) +\end{indentedexample} +In this case, the entities ``key'' and ``chest'' do not always refer to the same object. +The relation holds in the small world described by this sentence, but it does not always hold for every object designated by ``key''. +This is closely related to the fineness of entity linking. +Indeed, one could link the surface form ``key'' above with an entity designating this specific key, but this is not always the case, as exemplified by the SemEval 2010 Task 8 dataset. +This distinction is pertinent to the relation extraction task, especially in the aggregate setting. +When applied to entities with a unique referent, the \(\textsl{content--container}(e_1, e_2)\) relation is \(N\to 1\) or at least transitive. +However, when the unique referent assumption is false, this relation is not \(N\to 1\) anymore since several ``key'' entities can refer to different objects located in different containers. + +\begin{marginparagraph}[-14mm] + The aggregate setup is not necessarily contradictory with the unique referent assumption. + Even though not all ``keys'' are in a ``chest,'' this fact still gives us some information about ``keys,'' in particular they can be in a ``chest,'' which is not the case of all entities. +\end{marginparagraph} +The unique referent assumption is not binary; the distinction is quite fuzzy in most cases. +Should the entity \wdent{142} ``France'' refers both to the modern country and to the twelfth-century kingdom? +What about the West Frankish Kingdom? +How should we draw the distinction? +Instead of categorizing the model on whether they take the unique referent assumption for granted, we should instead look at their capacity to capture the kind of relationship between a key and a chest as conveyed by the above sample. + +\begin{marginparagraph} + More generally, all the usual properties of grammatical nouns can lead to variations of the relation extraction task. + For example, many models focus on rigid designators such as ``Lucius Junius Brutus'' which are opposed to flaccid designators such as ``founder of the Roman Republic.'' + Both refer to the same person \wdent{223440}. + However, it is possible to imagine a world where the ``founder of the Roman Republic'' does not refer to \wdent{223440}. + On the contrary, if \wdent{223440} exists, ``Lucius Junius Brutus'' ought to refer to him. +\end{marginparagraph} + +Finally, another variation of the definition of entities commonly encountered in relation extraction comes from coreference resolution. +Some datasets resolve pronouns such that in the sentence ``\uent{She} died in Marylebone,'' the word ``she'' can be considered an entity linked to \wdent{7259} ``Ada Lovelace'' if the context in which the sentence appears supports this. +In this case, the surface form of the entity gives little information about the nature of the entity. +This can be problematic for models relying too heavily on entities' surface forms. +In particular, early relation extraction models did not have access to entity identifiers; at the time, pronoun entities were avoided altogether. diff --git a/mainmatter/relation extraction/dependency tree.tex b/mainmatter/relation extraction/dependency tree.tex @@ -0,0 +1,38 @@ +\begin{tikzpicture}[ + word after/.style={right=0.9mm of #1, inner sep=0}, + punctuation after/.style={right=0mm of #1, inner sep=0}, + POS/.style={inner sep=0.4mm}, + node to word/.style={dashed}, + arc label/.style={pos=0.5,sloped,above}, + ] + \node[inner sep=0] (john) {\strut John}; + \node[word after=john] (found) {\strut found}; + \node[word after=found] (a) {\strut a}; + \node[word after=a] (solution) {\strut solution}; + \node[word after=solution] (to) {\strut to}; + \node[word after=to] (the) {\strut the}; + \node[word after=the] (problem) {\strut problem}; + \node[punctuation after=problem] (period) {\strut .}; + + \coordinate (lvl0) at (0, 3); + \coordinate (lvl1) at (0, 2.25); + \coordinate (lvl2) at (0, 1.5); + \coordinate (lvl3) at (0, 0.75); + + \node[POS] (njohn) at (lvl1-|john) {\texttt{N}}; + \node[POS] (nfound) at (lvl0-|found) {\texttt{V}}; + \node[POS] (na) at (lvl2-|a) {\texttt{Det}}; + \node[POS] (nsolution) at (lvl1-|solution) {\texttt{N}}; + \node[POS] (nthe) at (lvl3-|the) {\texttt{Det}}; + \node[POS] (nproblem) at (lvl2-|problem) {\texttt{N}}; + + \foreach \word in {john, found, a, solution, the, problem}{ + \draw[node to word] (\word) -- (n\word); + } + + \draw[arrow] (nfound) -- (njohn) node[arc label] {\texttt{subj}}; + \draw[arrow] (nfound) -- (nsolution) node[arc label] {\texttt{obj}}; + \draw[arrow] (nsolution) -- (na) node[arc label] {\texttt{det}}; + \draw[arrow] (nsolution) -- (nproblem) node[arc label] {\texttt{to}}; + \draw[arrow] (nproblem) -- (nthe) node[arc label] {\texttt{det}}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/dipre split.tex b/mainmatter/relation extraction/dipre split.tex @@ -0,0 +1,18 @@ +\begin{tikzpicture}[ + word after/.style={right=0.9mm of #1, inner sep=0}, + defbrace/.style={decorate,decoration={brace,amplitude=5}}, + underdef/.style={midway,anchor=north,yshift=-1mm}, + overdef/.style={midway,anchor=south,yshift=1mm} + ] + \node[inner sep=0] (prefix) {\strut \texttt{<li><b>}}; + \node[word after=prefix] (e1) {\strut \textsc{title}}; + \node[word after=e1] (infix) {\strut \texttt{</b>} by}; + \node[word after=infix] (e2) {\strut \textsc{author}}; + \node[word after=e2] (suffix) {\strut \ (\ \null}; + + \draw[defbrace] (prefix.south east) -- (prefix.south west) node [underdef] {\strut prefix}; + \draw[defbrace] (infix.south east) -- (infix.south west) node [underdef] {\strut infix}; + \draw[defbrace] (suffix.south east) -- (suffix.south west) node [underdef] {\strut suffix}; + \draw[defbrace] (e1.north west) -- (e1.north east) node [overdef] {\strut \(e_1\)}; + \draw[defbrace] (e2.north west) -- (e2.north east) node [overdef] {\strut \(e_2\)}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/emes.tex b/mainmatter/relation extraction/emes.tex @@ -0,0 +1,60 @@ +\begin{tikzpicture}[ + word after/.style={right=0.9mm of #1, inner sep=0}, + token after/.style={right=0mm of #1, inner sep=0}, + vector/.style={draw, anchor=south, rectangle, minimum width=1.5mm, inner sep=0mm, minimum height=6mm}, + row vector/.style={vector, minimum height=1.5mm, minimum width=6mm}, + baseline=(fmid.north), + ] + \node[inner sep=0] (cls) {\strut \textsc{cls}}; + \node[word after=cls] (be1) {\strut \texttt{<e1>}}; + \node[word after=be1] (jeremy) {\strut Jeremy}; + \node[word after=jeremy] (ben) {\strut Ben}; + \node[token after=ben] (tham) {\strut tham}; + \node[word after=tham] (ee1) {\strut \texttt{</e1>}}; + \node[word after=ee1] (was) {\strut was}; + \node[word after=was] (born) {\strut born}; + \node[word after=born] (in) {\strut in}; + \node[word after=in] (be2) {\strut \texttt{<e2>}}; + \node[word after=be2] (london) {\strut London}; + \node[word after=london] (ee2) {\strut \texttt{</e2>}}; + \node[word after=ee2] (period) {\strut .}; + \node[word after=period] (eos) {\strut \textsc{eos}}; + + \coordinate (bertsw) at ($(cls.north west) + (0,5mm)$); + \coordinate (bertne) at ($(eos.north east) + (0,15mm)$); + \node[draw, rectangle, inner sep=0mm, fit=(bertsw) (bertne)] (bert) {}; + \node[anchor=center] at (bert.center) {\large\textsc{bert}}; + + \foreach \w/\focus in { + cls/\transparencyDefault, + be1/1, + jeremy/\transparencyDefault, + ben/\transparencyDefault, + tham/\transparencyDefault, + ee1/\transparencyDefault, + was/\transparencyDefault, + born/\transparencyDefault, + in/\transparencyDefault, + be2/1, + london/\transparencyDefault, + ee2/\transparencyDefault, + period/\transparencyDefault, + eos/\transparencyDefault + }{ + \begin{scope}[opacity=\focus] + \node[vector] (v\w) at ($(\w.north) + (0, 2cm)$) {}; + \draw[arrow] (\w) -- (\w|-bert.south); + \draw[arrow] (\w|-bert.north) -- (v\w); + \end{scope} + } + + \coordinate (outy) at ($(vbe1.north)!0.5!(vbe2.north)$); + \coordinate (outp) at (outy-|bert); + \coordinate (fmid) at ($(outp) + (0, 1.5mm)$); + + \node[row vector,anchor=east] (re1) at ($(outp) + (0, 5mm)$) {}; + \node[row vector,right=0mm of re1] (re2) {}; + + \draw[arrow,rounded corners=1mm] (vbe1) -- (vbe1|-fmid) -- (re1|-fmid) -- (re1); + \draw[arrow,rounded corners=1mm] (vbe2) -- (vbe2|-fmid) -- (re2|-fmid) -- (re2); +\end{tikzpicture} diff --git a/mainmatter/relation extraction/entity pair graph.tex b/mainmatter/relation extraction/entity pair graph.tex @@ -0,0 +1,9 @@ +\begin{tikzpicture}[sentence/.style={draw=black!30,rounded corners=1mm,align=left,text width=#1}] + \node[sentence=9cm] (ep1) {The trustees invested \textcolor{Dark2-A}{\uhead{money}} directly into \textcolor{Dark2-C}{\utail{funds}} made available by Newton Investment.}; + \node[sentence=4cm, above=3cm of ep1, anchor=east, xshift=-1cm] (ep2) {Huge \textcolor{Dark2-A}{\uhead{money}} is given to \textcolor{Dark2-B}{\utail{companies}} for boosting economy.}; + \node[sentence=45mm, above=2cm of ep1, anchor=west, xshift=1cm] (ep3) {Japan injected \textcolor{Dark2-C}{\uhead{funds}} into struggling \textcolor{Dark2-B}{\utail{companies}}.}; + + \draw[color=Dark2-A] (ep1) -- (ep2) node[midway, sloped, above,color=Dark2-A] {money}; + \draw[color=Dark2-B] (ep2) -- (ep3) node[midway, sloped, above,color=Dark2-B] {companies}; + \draw[color=Dark2-C] (ep3) -- (ep1) node[midway, sloped, above,color=Dark2-C] {funds}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/epgnn sentence representation.tex b/mainmatter/relation extraction/epgnn sentence representation.tex @@ -0,0 +1,70 @@ +\begin{tikzpicture}[ + word after/.style={right=0.9mm of #1, inner sep=0}, + token after/.style={right=0mm of #1, inner sep=0}, + vector/.style={draw, anchor=south, rectangle, minimum width=1.5mm, inner sep=0mm, minimum height=6mm}, + row vector/.style={vector, minimum height=1.5mm, minimum width=6mm}, + ] + \node[inner sep=0] (cls) {\strut \textsc{cls}}; + \node[word after=cls] (be1) {\strut \texttt{<e1>}}; + \node[word after=be1] (jeremy) {\strut Jeremy}; + \node[word after=jeremy] (ben) {\strut Ben}; + \node[token after=ben] (tham) {\strut tham}; + \node[word after=tham] (ee1) {\strut \texttt{</e1>}}; + \node[word after=ee1] (was) {\strut was}; + \node[word after=was] (born) {\strut born}; + \node[word after=born] (in) {\strut in}; + \node[word after=in] (be2) {\strut \texttt{<e2>}}; + \node[word after=be2] (london) {\strut London}; + \node[word after=london] (ee2) {\strut \texttt{</e2>}}; + \node[word after=ee2] (period) {\strut .}; + \node[word after=period] (eos) {\strut \textsc{eos}}; + + \coordinate (bertsw) at ($(cls.north west) + (0,5mm)$); + \coordinate (bertne) at ($(eos.north east) + (0,15mm)$); + \node[draw, rectangle, inner sep=0mm, fit=(bertsw) (bertne)] (bert) {}; + \node[anchor=center] at (bert.center) {\large\textsc{bert}}; + + \foreach \w/\focus in { + cls/1, + be1/\transparencyDefault, + jeremy/1, + ben/1, + tham/1, + ee1/\transparencyDefault, + was/\transparencyDefault, + born/\transparencyDefault, + in/\transparencyDefault, + be2/\transparencyDefault, + london/1, + ee2/\transparencyDefault, + period/\transparencyDefault, + eos/\transparencyDefault + }{ + \begin{scope}[opacity=\focus] + \node[vector] (v\w) at ($(\w.north) + (0, 2cm)$) {}; + \draw[arrow] (\w) -- (\w|-bert.south); + \draw[arrow] (\w|-bert.north) -- (v\w); + \end{scope} + } + + \coordinate (mean1c) at ($(vjeremy.north west)!0.5!(vtham.north east)$); + \node[draw, rectangle, anchor=south] (mean1) at ($(mean1c) + (0,5mm)$) {Mean pooling}; + \draw[arrow] (vjeremy) -- (vjeremy|-mean1.south); + \draw[arrow] (vben) -- (vben|-mean1.south); + \draw[arrow] (vtham) -- (vtham|-mean1.south); + + \node[draw, rectangle, anchor=south] (mean2) at ($(vlondon.north) + (0,5mm)$) {Mean pooling}; + \draw[arrow] (vlondon) -- (mean2); + + \coordinate (ftop) at ($(mean1.north) + (0,6mm)$); + \node[row vector] (re1) at (ftop-|bert) {}; + \node[row vector,right=0mm of re1] (re2) {}; + \node[row vector,left=0mm of re1] (rcls) {}; + + \coordinate (fmid) at ($(mean1.north) + (0,1.5mm)$); + \coordinate (fmidplus) at ($(mean1.north) + (0,2.5mm)$); + + \draw[arrow,rounded corners=1mm] (vcls) -- (vcls|-fmidplus) -- (rcls|-fmidplus) -- (rcls); + \draw[arrow,rounded corners=1mm] (mean1) -- (mean1|-fmid) -- (re1|-fmid) -- (re1); + \draw[arrow,rounded corners=1mm] (mean2) -- (mean2|-fmid) -- (re2|-fmid) -- (re2); +\end{tikzpicture} diff --git a/mainmatter/relation extraction/few-shot problem.tex b/mainmatter/relation extraction/few-shot problem.tex @@ -0,0 +1,13 @@ +\begin{tabular}{@{}l@{\,}l@{}} + \toprule + \multicolumn{2}{@{}l}{\strong{Query:}} \\ + & It flows into the \utail{Hörsel} in \uhead{Eisenach}. \\ + \midrule + \multicolumn{2}{@{}l}{\strong{Candidates:}} \\ + & It is remake of \utail{Hindi} film ``\uhead{Tezaab}''. \\ + & \uhead{Cynidr} was the son of St \utail{Gwladys}. \\ + \(\rightarrow\) & \uhead{Herron Island} lies in \utail{Case Inlet}. \\ + & He gained the support of \utail{Admiral} \uhead{Edward Russell}. \\ + & \uhead{\textsc{ngc} 271} is a spiral galaxy in the constellation \utail{Cetus}. \\ + \bottomrule +\end{tabular} diff --git a/mainmatter/relation extraction/ie steps.tex b/mainmatter/relation extraction/ie steps.tex @@ -0,0 +1,27 @@ +\begin{tikzpicture} + \node[inner sep=0mm] (capital) at (0, 0) {\ is the capital of\ \null}; + \node[inner sep=0mm,left=0mm of capital] (paris) {\vphantom{capital of}Paris}; + \node[inner sep=0mm,right=0mm of capital] (france) {\vphantom{capital of}France}; + + \draw[color=Dark2-A,thick] (paris.south west) -- (paris.south east); + \draw[color=Dark2-A,thick] (france.south west) -- (france.south east); + + \node[below=3mm of paris.south] (e1) {\wdent[\color{Dark2-B}]{90}}; + \node[below=3mm of france.south] (e2) {\wdent[\color{Dark2-B}]{142}}; + \node[below=3mm of capital.south] (r) {\vphantom{\texttt{Q}}\wdrel[\color{Dark2-C}]{1376}}; + + \draw[color=Dark2-B,arrow] (paris) to (e1); + \draw[color=Dark2-B,arrow] (france) to (e2); + \draw[color=Dark2-C,arrow] (r|-capital.south) to (r); + \draw[color=Dark2-C,arrow] (e1) to (r); + \draw[color=Dark2-C,arrow] (e2) to (r); + + \node[anchor=south west,above=5mm of paris.north west,color=Dark2-A,draw,circle,inner sep=0.2mm] (s1) {1}; + \node[anchor=west,right=0mm of s1.east,color=Dark2-A,align=left] {\vphantom{chunkin}\\Entit\smash{y}\\chunkin\smash{g}}; + + \node[anchor=north west,below=0.5mm of e1.south west,color=Dark2-B,draw,circle,inner sep=0.2mm] (s2) {2}; + \node[anchor=west,right=0mm of s2.east,color=Dark2-B,align=left] {\vphantom{linkin}\\Entit\smash{y}\\linkin\smash{g}}; + + \node[anchor=north east,below=0.5mm of r.south east,color=Dark2-C,draw,circle,inner sep=0.2mm] (s3) {3}; + \node[anchor=west,right=0mm of s3.east,color=Dark2-C,align=left] {\vphantom{extraction}\\Relation\\extraction}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/introduction.tex b/mainmatter/relation extraction/introduction.tex @@ -0,0 +1,63 @@ +The rapid increase in the amount of published information brings forward the problem of how to handle large amounts of data. +To this goal, \emph{information extraction} aims at discovering the underlying semantic structure of texts. +As such, it is considered to be a part of natural language understanding. +It is the link from unstructured text to structured data. +Following Section~\ref{sec:context:knowledge base}, we will use knowledge bases as a formalization of structured data. +However, to encompass the notion of information more appropriately, the concept of knowledge base needs to be taken in a broad sense. +The strict definition of knowledge underlying most knowledge bases only includes general facts and does not encompass things such as ``Seneca is contemptuous even of the best garum.'' +However, this sentence conveys a piece of information that needs to be considered by information extraction systems. +As such, we will consider text-specific facts such as ``Seneca \textsl{dislikes} garum'' to be facts belonging in a knowledge base. + +In this thesis, we focus on relation extraction, a subtask of information extraction. +\begin{marginparagraph} + In contrast to relation extraction, when filling a template about an entity, the template has a fixed number of fields to be filled, in the language of Section~\ref{sec:context:relation algebra}, this means that all relations are left-total: \(r\relationComposition\breve{r}=r\relationComposition\breve{r}\relationOr\relationIdentity\). +\end{marginparagraph} +Precursors of relation extraction were the template filling tasks. +In these tasks, objects corresponding to a given class---usually a specific kind of event---must be extracted from a text, and a template must be filled with information about this object. +This was pioneered by \textcitex{syntactic_formatting} but started gathering interest with the message understanding conferences (\textsc{muc}) supported by \textsc{darpa}.% +\sidenote{The Defense Advanced Research Projects Agency, a research agency of the \textsc{usa} Department of Defense.} +The template filling task was formalized and evaluated in a systematic way starting with \textsc{muc-2}% +\sidenote{At the time, the conference was known as \textsc{muck-ii}.} +in 1989. +But it was not until 1997 that \textsc{muc-7} formalized the modern relation extraction task. +The \textsc{muc}s were succeeded by the automatic content extraction (\textsc{ace}) program convened by the \textsc{nist}% +\sidenote{The National Institute of Standards and Technology, an agency of the \textsc{usa} Department of Commerce.} +starting in 1999. + +The main information extraction task is known as \emph{knowledge base population} and consists in generating knowledge base facts from a set of documents. +This task can be broken down into several steps, as illustrated by Figure~\ref{fig:relation extraction:ie steps}: +\begin{description} + \item[Entity chunking] seeks to locate entities in text. + A similar task is named entity recognition (\textsc{ner}) which not only locates the entities but also assigns them with a type such as ``organization,'' ``person,'' ``location,'' etc. + The relation extraction datasets we consider in subsequent chapters do not include this entity-type information. + However, \textsc{ner} was more prevalent in relation extraction works during the 2000s decade. + + \item[Entity linking] assigns a knowledge base entity identifier to a tagged entity in a sentence. + This disambiguates ``Paris, France'' \wdent{90}, from ``Paris, son of Priam, king of Troy'' \wdent{167646} and ``Paris, genus of the true lover's knot plant'' \wdent{162121}. + Following the above discussion on our broad sense of knowledge, an entity may not necessarily appear in an existing knowledge base, in which case the entity identifier can be taken to be the entity's surface form. + + \item[Relation extraction] assigns a knowledge base relation identifier to an ordered pair of tagged entities in a sentence. + Paris is not only the capital of France, it is also located in France. + However, the sentence of Figure~\ref{fig:relation extraction:ie steps} does not convey the idea of location but the one of capital, thus predicting ``\textsl{located in country}'' \wdrel{17} would be incorrect there. +\end{description} +\begin{marginfigure}[-60mm] + \centering + \input{mainmatter/relation extraction/ie steps.tex} + \scaption[The three standard tasks for knowledge base population.]{ + The three standard tasks for knowledge base population. + First, entity chunking locates the entities in the sentence, here ``Paris'' and ``France.'' + Second, entity linking map each entity to a knowledge base identifier, here \wdent{90} and \wdent{142}. + Third, relation extraction find the relation linking the two entities, here \wdrel{1376} (\textsl{capital of}). + } + \label{fig:relation extraction:ie steps} +\end{marginfigure} + +Whereas Chapter~\ref{chap:context} introduces the main tools used in relation extraction systems, the present chapter focuses on the relation extraction task itself. +We formally define relation extraction in Section~\ref{sec:relation extraction:definition} and introduce its main variants encountered in the literature. +A fundamental problem of relation extraction models is how to obtain supervision. +Hand labeling a dataset is tedious and error-prone, so several alternative supervision techniques have been considered over the years; this is the focus of Section~\ref{sec:relation extraction:supervision}. +We then introduce noteworthy supervised approaches--including weakly and semi-supervised ones---in Sections~\ref{sec:relation extraction:sentential} and~\ref{sec:relation extraction:aggregate}. +As we will see in Section~\ref{sec:relation extraction:definition}, the task can be tackled at the sentence level or at a higher level. +Section~\ref{sec:relation extraction:sentential} introduces sentence-level models, while Section~\ref{sec:relation extraction:aggregate} introduces higher-level models. +Lastly, we delve into the main subject of this thesis, unsupervised relation extraction, in Section~\ref{sec:relation extraction:unsupervised}. +Each of these sections is generally ordered following historical development, with older methods appearing first and current state-of-the-art appearing last. diff --git a/mainmatter/relation extraction/label propagation.tex b/mainmatter/relation extraction/label propagation.tex @@ -0,0 +1,25 @@ +\begin{algorithmic} + \Function{Label Propagation}{} + \FunctionInputs{} \(\dataSet_\relationSet\) labeled dataset + \FunctionInputs*{} \(\dataSet\) unlabeled dataset + \FunctionOutput{} \(\vctr{\hat{r}}\) relation predictions + \State + \LComment{Initialization} + \State \(\mtrx{T}\gets \text{computed using Equation~\ref{eq:relation extraction:label propagation transition}}\) + \State \hphantom{\(T\gets\)} from \(\dataSet_\relationSet\) and \(\dataSet\) + \State \(\mtrx{Y} \gets \text{random stochastic matrix}\) + \ForAll{\((s_i, \vctr{e}_i, r_i)\in\dataSet_\relationSet\)} + \State \(y_{ij} \gets \delta_{j,r_i}\) + \EndFor + \LComment{Training} + \Loop + \State \(\mtrx{Y} \gets \mtrx{T}\mtrx{Y}\) + \ForAll{\((s_i, \vctr{e}_i, r_i)\in\dataSet_\relationSet\)} + \State \(y_{ij} \gets \delta_{j,r_i}\) + \EndFor + \EndLoop + \vspace{1mm} + \State \(\hat{r}_i \gets \argmax_j y_{ij}\) + \State \Output \(\vctr{\hat{r}}\) + \EndFunction +\end{algorithmic} diff --git a/mainmatter/relation extraction/marcheggiani plate.tex b/mainmatter/relation extraction/marcheggiani plate.tex @@ -0,0 +1,16 @@ +\begin{tikzpicture}[node distance=7mm] + \node[pdiag observed] (e) {\(\rndmvctr{e}\)}; + \node[pdiag latent, above=8mm of x] (r) {\(\rndm{r}\)}; + \node[pdiag observed, left=of r] (s) {\(\rndm{s}\)}; + \node[inner sep=1mm, above=5mm of s] (phi) {\(\vctr{\phi}\)}; + \node[inner sep=1mm, right=of e] (theta) {\(\vctr{\theta}\)}; + \draw[arrow] (r) -- (e); + \draw[arrow] (theta) -- (e); + \draw[arrow, dashed] (e) to[out=135, in=-135] (r); + \draw[arrow, dashed] (s) -- (r); + \draw[arrow, dashed] (phi) -- (r); + + \coordinate (plspace) at ($(e.east) + (3mm, 0)$); + \node[pdiag plate, inner sep=1mm, fit=(e) (s) (r) (plspace)] (p) {}; + \node[anchor=south west] at (p.south west) {\(|\dataSet|\)}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/miml setup.tex b/mainmatter/relation extraction/miml setup.tex @@ -0,0 +1,18 @@ +\begin{tikzpicture}[node distance=1mm] + \node[align=center] (e) {entity\\pair}; + + \node[right=15mm of e] (rd) {\strut\(\ldots\)}; + \node[above=of rd] (r1) {\strut\(r_1\)}; + \node[below=of rd] (rm) {\strut\(r_m\)}; + + \node[left=15mm of e] (id) {\strut\(\ldots\)}; + \node[above=of id] (i2) {\strut\(i_2\)}; + \node[above=of i2] (i1) {\strut\(i_1\)}; + \node[below=of id] (in) {\strut\(i_n\)}; + + \draw[arrow] (e) -- (r1); + \draw[arrow] (e) -- (i1); + \draw[arrow] (e) -- (i2); + \draw[arrow] (e) -- (rm) node[sloped, midway, below] {linked by}; + \draw[arrow] (e) -- (in) node[sloped, midway, below] {appears in}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/multir plate.tex b/mainmatter/relation extraction/multir plate.tex @@ -0,0 +1,20 @@ +\begin{tikzpicture} + \node[pdiag latent] (z) {\(\symup{z}\)}; + \node[pdiag observed, left=6mm of z] (x) {\(\symup{x}\)}; + \node[pdiag observed, right=6mm of z] (y) {\(\symup{y}\)}; + + \draw (z) -- (y) node[midway, pdiag factor] {}; + \draw (z) -- (x) node[midway, pdiag factor] {}; + + \coordinate (p1lspace) at ($(y.south) + (0,-4mm)$); + \node[pdiag plate, fit=(y) (p1lspace)] (p1) {}; + \node[anchor=south west] at (p1.south west) {\(\relationSet\)}; + + \coordinate (p2lspace) at ($(z.south) + (0,-4mm)$); + \node[pdiag plate, fit=(z) (x) (p2lspace)] (p2) {}; + \node[anchor=south west] at (p2.south west) {\(\sentenceSet\divslash\entitySet^2\)}; + + \coordinate (p3lspace) at ($(p1.north) + (0,5mm)$); + \node[pdiag plate, fit=(p1) (p2) (p3lspace)] (p3) {}; + \node[anchor=north west] at (p3.north west) {\(\entitySet^2\)}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/multir.tex b/mainmatter/relation extraction/multir.tex @@ -0,0 +1,18 @@ +\begin{algorithmic} + \Function{MultiR}{} + \FunctionInput{} \(\dataSet_\relationSet^\vctr{e}\) \rlap{a supervised multi-instance dataset} + \FunctionOutput{} \(\vctr{\theta}\) model parameters + \State + \State \(\vctr{\theta} \gets \mtrx{0}\) + \Loop + \ForAll{\((\vctr{x}_i, \vctr{y}_i)\in\dataSet^\vctr{e}_\relationSet\)} + \State \(\displaystyle(\vctr{y}', \vctr{z}') \gets \argmax_{\vctr{y}, \vctr{z}} P(\vctr{y}, \vctr{z}\mid \vctr{x}_i; \vctr{\theta})\) + \If{\(\vctr{y}'\neq \vctr{y}_i\)} + \State \(\displaystyle\vctr{z}^*\gets \argmax_{\vctr{z}} P(\vctr{z}\mid \vctr{x}_i, \vctr{y}_i; \vctr{\theta})\) + \State \(\mtrx{\theta}\gets\mtrx{\theta} + \vctr{\phi}(\vctr{x}_i, \vctr{z}^*) - \vctr{\phi}(\vctr{x}_i, \vctr{z}') \) + \EndIf + \EndFor + \EndLoop + \State \Output \(\vctr{\theta}\) + \EndFunction +\end{algorithmic} diff --git a/mainmatter/relation extraction/pcnn.tex b/mainmatter/relation extraction/pcnn.tex @@ -0,0 +1,75 @@ +\begin{tikzpicture}[ + word/.style={text depth=0,anchor=center,inner sep=0}, + flow/.style={thin,-latex,color=black!80}, + square/.style={regular polygon,regular polygon sides=4, inner sep=0}, + sidebrace/.style={decorate,decoration={brace,amplitude=0.3em,mirror}}, + sidenote/.style={left,midway,xshift=-0.4em,text depth=0}, + transform/.style={rotate=-90,draw,align=center,scale=0.7,rounded corners=0.1cm}] + + \foreach \i/\da/\db/\name/\text in { 0/-2/-8/x00/Founded, + 1/-1/-7/x01/in, + 2/ / /e1/\strong{Rome}, + 3/ 1/-5/x10/{(}, + 4/ 2/-4/x11/then, + 5/ 3/-3/x12/capital, + 6/ 4/-2/x13/of, + 7/ 5/-1/x14/the, + 8/ / /e2/\strong{Papal States}, + 9/ 7/ 1/x20/{)}, + 10/ 8/ 2/x21/in, + 11/ 9/ 3/x22/1575, + 12/10/ 4/x23/by, + 13/11/ 5/x24/St, + 14/12/ 6/x25/Philip, + 15/13/ 7/x26/\ldots}{ + \node[word] (t\name) at (-0.3, -0.3*\i+0.4*0.3) {\footnotesize \text}; + \node at (2.4, -0.3*\i+0.15) {\scriptsize \da}; + \node at (2.8, -0.3*\i+0.15) {\scriptsize \db}; + \node[draw,minimum width=20mm,minimum height=0.3cm,anchor=north west] (\name) at (1, -0.3*\i+0.3) {}; + } + + \draw (2.6, 0.3) -- (2.6, -4.5); + \draw (2.2, 0.3) -- (2.2, -4.5); + + \draw[flow] (x00.east) -- (4, -0.55); + \draw[flow] (x01.east) -- (4, -0.65); + \draw[flow] (x10.east) -- (4, -1.95); + \draw[flow] (x11.east) -- (4, -2.05); + \draw[flow] (x12.east) -- (4, -2.15); + \draw[flow] (x13.east) -- (4, -2.25); + \draw[flow] (x14.east) -- (4, -2.35); + \draw[flow] (x20.east) -- (4, -3.3); + \draw[flow] (x21.east) -- (4, -3.4); + \draw[flow] (x22.east) -- (4, -3.5); + \draw[flow] (x23.east) -- (4, -3.6); + \draw[flow] (x24.east) -- (4, -3.7); + \draw[flow] (x25.east) -- (4, -3.8); + \draw[flow] (x26.east) -- (4, -3.9); + + \draw[pattern=north east lines,pattern color=Dark2-C,draw=none] (e1.north west) rectangle (e1.south east); + \draw[pattern=north east lines,pattern color=Dark2-C,draw=none] (e2.north west) rectangle (e2.south east); + + \draw [sidebrace] (tx00.north -| -1.4,0) -- (tx01.south -| -1.4,0) node [sidenote] {prefix}; + \draw [sidebrace] (tx10.north -| -1.4,0) -- (tx14.south -| -1.4,0) node [sidenote] {infix}; + \draw [sidebrace] (tx20.north -| -1.4,0) -- (tx26.south -| -1.4,0) node [sidenote] {suffix}; + + \node[draw,square,fill=black!20,rounded corners=0.1cm] (linear) at (8, -2) {Linear}; + \node[transform,minimum width=1.4cm/0.7,anchor=north] (softmax) at (9.3, -2) {softmax}; + + \node[right=2mm of softmax.north] {\(P(\rndm{r}\mid s)\)}; + + \draw [sidebrace] (2.25, -4.6) -- (3, -4.6) node [below,midway,yshift=-1mm,xshift=-3mm,align=left,text width=4cm,anchor=north west] {positional\\embeddings}; + \draw [sidebrace] (1, -4.6) -- (2.15, -4.6) node [below,midway,yshift=-1mm,xshift=3mm,align=right,text width=4cm,anchor=north east] {\hfill word\\\hfill embeddings}; + + \foreach \i/\n in {-1/p,-2.5/i,-4/s}{ + \foreach \j/\c in {0.3/20,0.2/40,0.1/20,0/40}{ + \filldraw[fill=black!\c,fill opacity=0.7,rounded corners=0.05cm] (4+\j, \i+\j) rectangle +(0.8, 0.8); + } + \node at (4+0.4, \i+0.4) {\footnotesize Conv}; + + \node[transform,minimum width=1.1cm/0.7,anchor=north east] at (6, \i) {max\\pooling}; + \node[transform,minimum width=1.1cm/0.7,anchor=north east] (relu\n) at (6.5, \i) {tanh}; + + \draw[flow] (relu\n) -- (linear); + } +\end{tikzpicture} diff --git a/mainmatter/relation extraction/pullback.tex b/mainmatter/relation extraction/pullback.tex @@ -0,0 +1,10 @@ +\begin{tikzpicture} + \node (d) {\(\dataSet\)}; + \node[below=of d] (e) {\(\entitySet^2\)}; + \node[right=of d] (s) {\(\sentenceSet\)}; + \node (r) at (e-|s) {\(\relationSet\)}; + \draw[arrow] (d) -- (e); + \draw[arrow] (d) -- (s); + \draw[arrow] (e) -- (r); + \draw[arrow] (s) -- (r); +\end{tikzpicture} diff --git a/mainmatter/relation extraction/rellda plate.tex b/mainmatter/relation extraction/rellda plate.tex @@ -0,0 +1,33 @@ +\kern1mm% +\begin{tikzpicture}[node distance=5mm] + \node[pdiag observed, opacity=0] (ff) {\(\rndm{f}\)}; + \node[pdiag latent, above=of ff] (r) {\(\rndm{r}_i\)}; + \node (fdots) {\(\cdots\)}; + \node[pdiag observed, left=1mm of ff] (f1) {\(\rndm{f}_{ij}\)}; + \node[pdiag observed, right=1mm of ff] (f2) {\(\rndm{f}_{ik}\)}; + \draw[arrow] (r) -- (f1); + \draw[arrow] (r) -- (f2); + + \coordinate (p1lspace) at ($(f2.south) + (0,-2mm)$); + \node[pdiag plate, inner sep=2.5mm, fit=(r) (f1) (f2) (p1lspace)] (p1) {}; + + \node[pdiag latent, above=of r] (theta) {\(\rndm{\theta}_d\)}; + \coordinate (p2lspace) at ($(p1.south) + (0,-5mm)$); + \node[pdiag plate, fit=(p1) (theta) (p2lspace)] (p2) {}; + \draw[arrow] (theta) -- (r); + + \node[pdiag latent, right=12mm of theta] (alpha) {\(\alpha\)}; + \draw[arrow] (alpha) -- (theta); + + \node[pdiag latent, left=15mm of ff] (phi) {\(\rndm{\phi}_{rj}\)}; + \coordinate (p3lspace) at ($(phi.south) + (0,-2mm)$); + \node[pdiag plate, inner sep=2.5mm, fit=(phi) (p3lspace)] (p3) {}; + \draw[arrow] (phi) -- (f1); + + \node[pdiag latent, above=of phi] (beta) {\(\beta\)}; + \draw[arrow] (beta) -- (phi); + + \node[anchor=south east] at (p1.south east) {\(n_d\)}; + \node[anchor=south east] at (p2.south east) {\(D\)}; + \node[anchor=south east] at (p3.south east) {\(|\relationSet|\)}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/rellda.tex b/mainmatter/relation extraction/rellda.tex @@ -0,0 +1,23 @@ +\begin{algorithmic} + \Function{Rel-\textsc{lda} Generation}{} + \FunctionInputs{} \(\alpha\) relations hyperprior + \FunctionInputs*{} \(\beta\) features hyperprior + \FunctionOutput{} \(\mtrx{F}\) observed features + \State + \ForAll{relations \(r\)} + \ForAll{features \(j\)} + \State Choose \(\phi_{rj}\sim\operatorname{Dir}(\beta)\) + \EndFor + \EndFor + \ForAll{documents \(d\)} + \State Choose \(\theta_d\sim\operatorname{Dir}(\alpha)\) + \ForAll{samples \(i\) in \(d\)} + \State Choose \(r\sim\operatorname{Cat}(\theta_d)\) + \ForAll{features \(j\)} + \State Choose \(f_{ij}\sim\operatorname{Cat}(\phi_{rj})\) + \EndFor + \EndFor + \EndFor + \State \Output \(\mtrx{F}\) + \EndFunction +\end{algorithmic} diff --git a/mainmatter/relation extraction/selfore.tex b/mainmatter/relation extraction/selfore.tex @@ -0,0 +1,55 @@ +\begin{tikzpicture}[ + sample/.style={minimum width=1.5mm, inner sep=0mm}, + classA/.style={sample, fill=Dark2-B, circle}, + classB/.style={sample, fill=Dark2-B, regular polygon, regular polygon sides=3}, + classC/.style={sample, fill=Dark2-B, star, star points=5} + ] + \node[draw, minimum width=37mm] (bert) {\bertcoder}; + \node[below=4mm of bert] (x) {\(s\)}; + \node[above=4mm of bert] (h) {\(\vctr{h}\)}; + \node[draw, trapezium, trapezium angle=45, minimum width=2cm, anchor=south] (encoder) at ($(bert.north) + (-12mm, 1cm)$) {Encoder}; + \coordinate (zsw) at ($(encoder.top left corner) + (0, 1mm)$); + \coordinate (zne) at ($(encoder.top right corner) + (0, 11mm)$); + \node[draw, Dark2-B, inner sep=0, fit=(zsw) (zne)] (z) {}; + \draw[thin, Dark2-B, faded] (z.center) -- (z.south); + \draw[thin, Dark2-B, faded] (z.center) -- (z.30); + \draw[thin, Dark2-B, faded] (z.center) -- (z.150); + \node[classA, xshift=3mm, yshift=-2mm] at (z.center) {}; + \node[classA, xshift=4.4mm, yshift=0mm] at (z.center) {}; + \node[classA, xshift=5.2mm, yshift=-2.8mm] at (z.center) {}; + \node[classA, xshift=1.7mm, yshift=-3.4mm] at (z.center) {}; + \node[classB, xshift=-3.5mm, yshift=-1.5mm] at (z.center) {}; + \node[classB, xshift=-4.2mm, yshift=-3.4mm] at (z.center) {}; + \node[classB, xshift=-5.3mm, yshift=-2.8mm] at (z.center) {}; + \node[classB, xshift=-1.5mm, yshift=-0.2mm] at (z.center) {}; + \node[classB, xshift=-5.2mm, yshift=1mm] at (z.center) {}; + \node[classC, xshift=-1mm, yshift=2.5mm] at (z.center) {}; + \node[classC, xshift=1mm, yshift=3.5mm] at (z.center) {}; + \node[classC, xshift=-2.5mm, yshift=3.7mm] at (z.center) {}; + \node[classC, xshift=2.1mm, yshift=2.7mm] at (z.center) {}; + + \coordinate (cx) at ($(bert.north east)!0.5!(bert.north)$); + \node[draw, inner sep=0, fit=(z.north) (encoder.south) (encoder.top left corner) (encoder.top right corner), anchor=south, label=center:Classifier] (classifier) at ($(bert.north) + (12mm, 1cm)$) {}; + \coordinate (tsw) at ($(classifier.north west) + (0, 1mm)$); + \coordinate (tne) at ($(classifier.north east) + (0, 3mm)$); + \node[draw, Dark2-B, inner sep=0, fit=(tsw) (tne)] (t) {}; + \node[classA] at ($(t.east)!0.1667!(t.west)$) {}; + \node[classB,yshift=-0.1mm] at ($(t.east)!0.5!(t.west)$) {}; + \node[classC] at ($(t.east)!0.8333!(t.west)$) {}; + \draw[thin, Dark2-B, faded] ($(t.north east)!0.3333!(t.north west)$) -- ($(t.south east)!0.3333!(t.south west)$); + \draw[thin, Dark2-B, faded] ($(t.north east)!0.6667!(t.north west)$) -- ($(t.south east)!0.6667!(t.south west)$); + + \node[draw, thick, dashed, Dark2-A, fit=(z.north) (encoder.bottom right corner) (encoder.bottom left corner)] (f0) {}; + \node[fit=(t) (classifier)] (f1) {}; + \node[fit=(bert)] (f2) {}; + \draw[thick, dashed, Dark2-C] (f1.north west) -- (f1.west|-f2.north) -- (f2.north west) -- (f2.south west) -- (f2.south east) -- (f2.east|-f1.north) -- cycle; + + \node[Dark2-A, anchor=south] at (f0.north) {\(\loss{ac}\)}; + \node[Dark2-C, anchor=south] at (f1.north) {\(\loss{rc}\)}; + + \draw[arrow] (x) -- (bert); + \draw[arrow] (bert) -- (h); + \draw[arrow, rounded corners=1mm] (h) -- (h-|encoder) -- (encoder); + \draw[arrow, rounded corners=1mm] (h) -- (h-|classifier) -- (classifier); + \draw[arrow, rounded corners=1mm] (z) -- (z-|h) -- (h|-t) -- (t); +\end{tikzpicture} diff --git a/mainmatter/relation extraction/sentential.tex b/mainmatter/relation extraction/sentential.tex @@ -0,0 +1,388 @@ +\section{Supervised Sentential Extraction Models} +\label{sec:relation extraction:sentential} +In the supervised setup, all variables listed in Table~\ref{tab:relation extraction:supervised samples} are given at train time. +During evaluation, the relation must be predicted from the other three variables: sentence, head entity and tail entity. +The predictions for each sample can then be compared to the gold standard.% +\sidenote{ + When a distant supervision dataset is used, ``gold standard'' is somewhat a misnomer. + In this case, the relation labels are often referred to as a ``silver standard'' since they are not as good as possible. +} +We introduce the commonly used metric for evaluation on a supervised dataset in Section~\ref{sec:relation extraction:supervised evaluation}. +The following sections focus on important supervised methods, including weakly-supervised and semi-supervised methods. +These sections focus on sentential relation extraction methods, which realize Equation~\ref{eq:relation extraction:sentential definition}. +In contrast, Section~\ref{sec:relation extraction:aggregate} focuses on aggregate methods, which often build upon sentential approaches. + +\subsection{Evaluation} +\label{sec:relation extraction:supervised evaluation} +Since supervised relation extraction is a standard multiclass classification task, it uses the usual \fone{} metric, with one small tweak to handle directionality. +As for training, we use samples from \(\dataSet_\relationSet\subseteq\sentenceSet\times\entitySet^2\times\relationSet\) for evaluation. +Let's call \(x\in\dataSet\subseteq\sentenceSet\times\entitySet^2\) an unlabeled sample, and \(g\colon\dataSet\to\relationSet\) the function which associates with each sample \(x\) its gold label in the dataset (as given by \(\dataSet_\relationSet\)). +Similarly, let's call \(c\colon\dataSet\to\relationSet\) the function which associates with each sample \(x\) the relation predicted by the model we are evaluating. +The standard \fone{} score for a relation \(r\in\relationSet\) can be defined as: +\begin{equation*} + \operatorname{precision}(g, c, r) = + \frac{|\{\,x\in\dataSet \mid c(x) = g(x) = r\,\}|}{|\{\,x\in\dataSet \mid c(x) = r\,\}|} = \frac{\text{true positive}}{\text{predicted positive}} +\end{equation*} +% Page break here +\begin{align*} + \operatorname{recall}(g, c, r) & = + \frac{|\{\,x\in\dataSet \mid c(x) = g(x) = r\,\}|}{|\{\,x\in\dataSet \mid g(x) = r\,\}|} = \frac{\text{true positive}}{\text{labeled positive}}\\ + \fone(g, c, r) & = + \frac{2}{\operatorname{precision}(g, c, r)^{-1}\times\operatorname{recall}(g, c, r)^{-1}}. +\end{align*} + +To aggregate these scores into a single number, multiple approaches are possible. +First of all, micro-averaging: the true positives, predicted positive and labeled positive are averaged over all relations. +In the case where all samples have one and only one label and prediction, micro-precision, micro-recall and micro-\fone{} collapse into the same value, namely the accuracy. +However, when computing a micro-metric on a dataset containing the \textsl{other} relation (Section~\ref{sec:relation extraction:other}), the samples labeled \textsl{other} are ignored, making the difference between micro-precision and micro-recall relevant again. + +The second set of approaches uses macro-averaging, which means that the scores are averaged a first time for each relation before taking the average of these averages over the set of relations. +This compensates for the class imbalance in the dataset since when taking the average of the averages, the score for a rare class is weighted the same as the score for a frequent class. +The ``directed'' macro-scores are defined as usual: +\begin{align*} + \overDirected{\operatorname{precision}}(g, c) & = \frac{1}{|\relationSet|} \sum_{r\in\relationSet} \operatorname{precision}(g, c, r) \\ + \overDirected{\operatorname{recall}}(g, c) & = \frac{1}{|\relationSet|} \sum_{r\in\relationSet} \operatorname{recall}(g, c, r) \\ + \overDirected{\fone}(g, c) & = \frac{1}{|\relationSet|} \sum_{r\in\relationSet} \fone(g, c, r). +\end{align*} +However, two other variants exist. +These variants try to discard the orientation of the relationship by packing together a relation \(r\) with its reverse \(\breve{r}\). +This allows us to evaluate separately the ability of the model to find the correct relation and to find which entity is the subject (\(e_1\)) and which is the object (\(e_2\)). +The simplest way to achieve this is to simply ignore the orientation: +\begin{equation*} + \overUndirected{\operatorname{precision}}(g, c) = + \frac{1}{|\relationSet^\dagger|} \sum_{\{r, \breve{r}\}\in\relationSet^\dagger} + \frac{\big|\big\{\,x\in\dataSet \mid c(x), g(x) \in \{r, \breve{r}\}\,\big\}\big|} + {\big|\big\{\,x\in\dataSet \mid c(x) \in \{r, \breve{r}\}\,\big\}\big|}, +\end{equation*} +where \(\relationSet^\dagger\) is the set of relations paired by ignoring directionality. +The set \(\relationSet^\dagger\) is well defined, since for the datasets using this metric, \(\relationSet\) is closed under the reverse operation \(\breve{\ }\) with the notable exception of \textsl{other}. +However, similarly to micro-metrics, \textsl{other} is often ignored altogether. +It only influences the final metrics through the degradation of recall on samples mispredicted as \textsl{other} and of precision on samples mispredicted as not \textsl{other}. +Following the definitions above, we can similarly define \(\overUndirected{\operatorname{recall}}\) and \(\overUndirected{\fone}\). + +Finally, as a compromise between the directed \(\overDirected{\fone}\) and undirected \(\overUndirected{\fone}\), the half-directed metric was designed: +\begin{equation*} + \overHalfdirected{\operatorname{precision}}(g, c) = + \frac{1}{|\relationSet^\dagger|} \sum_{\{r, \breve{r}\}\in\relationSet^\dagger} + \frac{\big|\big\{\,x\in\dataSet \mid g(x) \in \{r, \breve{r}\} \land c(x) = g(x)\,\big\}\big|} + {\big|\big\{\,x\in\dataSet \mid c(x) \in \{r, \breve{r}\}\,\big\}\big|}. +\end{equation*} +The key difference with the undirected metric is that while the prediction and gold must still be equal to \(r\) or \(\breve{r}\), they furthermore need to be equal to each other. +Figure~\ref{fig:relation extraction:supervised metrics} gives a visual explanation using the confusion matrix. +Note that the distinction between directed and undirected metrics can also apply to micro-metrics. +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/supervised metrics.tex} + \scaption[Supervised metrics defined on the confusion matrix.]{ + Supervised metrics defined on the confusion matrix. + Directed metrics consider green and blue to be different classes, the \(\overDirected{\operatorname{recall}}\) for the relation \(r\) is computed by dividing the number of samples in the dark green cell by the total number of samples in the green row. + Undirected metrics consider green and blue to be the same class, the \(\overUndirected{\operatorname{recall}}\) for this class is computed by summing the four cells in the center including the two hatched ones and dividing by the sum of the two rows. + Half-directed metrics also consider \(\{r,\breve{r}\}\) to form a single class but the \(\overHalfdirected{\operatorname{recall}}\) is computed by summing the two dark cells in the center---ignoring the two hatched ones---and dividing by the sum of the two rows. + \label{fig:relation extraction:supervised metrics} + } +\end{marginfigure} + +In conclusion, the evaluation of supervised approaches varies along three axes: +\begin{itemize} + \item Whether \textsl{other} is considered a normal relation or is only taken into account through degraded precision and recall on the other classes. + \item Whether the directionality of relations is taken into account. + \item Whether class imbalance is corrected through macro-aggregation. +\end{itemize} + +We now describe supervised relation extraction models, starting in this section with sentential approaches. + +\subsection{Regular Expressions: \textsc{dipre}} +\label{sec:relation extraction:dipre} +Dual Iterative Pattern Relation Expansion (\textsc{dipre}, \citex{dipre}) follows the bootstrap approaches (Section~\ref{sec:relation extraction:bootstrap}) and thus assumes \hypothesis{pullback}. +Compared to \textcite{hearst_hyponyms}, \textsc{dipre} proposes a simple automation for the \(\relationSet_\entitySet\times \dataSet\to \relationSet_\sentenceSet\) step---the extraction of new patterns---and applies it to the extraction of the ``\textsl{author of book}'' relation. +To facilitate this automation and in contrast to \textcite{hearst_hyponyms}, it limits itself to two entities per patterns. +\textsc{dipre} introduces the split-in-three-affixes technique illustrated by Figure~\ref{fig:relation extraction:dipre split}. +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/dipre split.tex} + \scaption[\textsc{dipre} split-in-three-affixes method.]{ + \textsc{dipre} split-in-three-affixes method. + The algorithm ran on \textsc{html} code, \texttt{<li>} marks a list item, while \texttt{<b></b>} surrounds bold text. + \label{fig:relation extraction:dipre split} + } +\end{marginfigure} +The entities split the text into three parts: prefix before the first entity, infix between the two entities and suffix after the second entity. +This could be considered five parts with the two entities' surface forms since they are not part of any of the three affixes. +This split reappeared in other works since, with the simplest methods assuming that the infix alone conveys the relation. +Even in the case of \textsc{dipre}, all three affixes are considered, but the infix needed to match exactly, while the prefix and suffix could be shortened in order to make a pattern more general. +All patterns are specific to an \textsc{url} prefix, which made the algorithm pick up quickly on lists of books, with the algorithm also handling patterns where the author appeared before the title with a simple boolean marker. + +In order to generate new patterns, \textsc{dipre} takes all occurrences with the same infix and with the title and author in the same order. +To avoid pattern which are too general they use the following approximation of the specificity of a pattern: +\begin{align*} + \operatorname{specificity}(\text{pattern}) & = -\log( P(\text{pattern matches})) \\ + & \approx \text{total length of the affixes}. +\end{align*} +When this specificity is lower than a given threshold divided by the number of known books it matched, the pattern was rejected. +In the experiment, the algorithm was run on a starting set of five \((\text{author}, \text{title})\) facts which generated three patterns, one of which is given in Figure~\ref{fig:relation extraction:dipre split}; these patterns produced in turn 4\,047 facts. +As per \textcite{hearst_hyponyms}, the algorithm was then iterated once again on these new facts. +The second iteration introduced bogus facts, which were removed manually. +Finally, the third iteration produced a total of 15\,257 \textsl{author of book} facts. +\Textcite{dipre} manually analyzes twenty books out of these 15\,257 and found that only one of them was not a book but an article, while four of them were obscure enough not to appear in the list of a major bookseller. + +A limitation of the bootstrap approaches assuming \hypothesis{pullback} is that this assumption naively entails the following: +\begin{marginparagraph} + As a reminder from Section~\ref{sec:context:relation algebra}: \(\relationZero\) denotes the empty relation linking no entities together. + So \(r_1\relationAnd r_2 = \relationZero\) should be understood as ``if we take the relation linking together all the entity pairs connected at the same time (\(\relationAnd\)) by \(r_1\) and \(r_2\), we should obtain the relation liking no entities together (\(\relationZero\)).'' +\end{marginparagraph} +\begin{assumption}[oneadjacency]{1-adjacency} + There is no more than one relation linking any two entities. + + \smallskip + \noindent + \( \forall r_1, r_2\in\relationSet\colon r_1\relationAnd r_2 = \relationZero \) +\end{assumption} +Indeed, if a pair of entities is linked by two relations, this would implies a sentence containing these two entities also convey the two relations. +By induction it follows that the two relations would actually be the same. + +The approach of \textsc{dipre} was subsequently used by other systems such as Snowball \parencite{snowball}, which uses more complex matching and pattern generation algorithms and formalizes the experimental setup. +We now focus on another semi-supervised approach similar to bootstrap, which was important to the development of relation extraction methods. + +\subsection{Dependency Trees: \textsc{dirt}} +\label{sec:relation extraction:dirt} +Discovery of Inference Rules from Text (\textsc{dirt}, \citex{dirt}) also uses the \hypothesis{pullback} assumption but makes a single iteration of the bootstrap algorithm from a single example. +Furthermore, \textsc{dirt} makes the pattern building \(\relationSet_\entitySet\times \dataSet\to \relationSet_\sentenceSet\) more resilient to noise and applies the algorithm to multiple relations. +Another difference is that it factorizes the definition of \(\relationSet_\sentenceSet\) using dependency paths instead of regular expressions. +Given a sentence, a dependency parser can create a tree where nodes are built from words, and the arcs between the nodes correspond to the grammatical relationship between the words. +This is called a dependency tree and is exemplified by Figure~\ref{fig:relation extraction:dependency tree}. +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/dependency tree.tex} + \scaption[Example of dependency tree.]{ + Example of dependency tree given by \textcite{dirt} generated using the Minipar dependency parser. + The nodes correspond to words in the sentence, as indicated by the dashed line. + Each node is tagged by the part-of-speech (\textsc{pos}) of the associated word. + The arrows between the nodes are labeled with the dependency between the words. + The following abbreviations are used: \texttt{N} is noun, \texttt{V} is verb, \texttt{Det} is determiner, \texttt{subj} is subject, \texttt{obj} is object, and \texttt{det} is the determiner relation. + \label{fig:relation extraction:dependency tree} + } +\end{marginfigure}% +\begin{epigraph} + {Groucho Marx} + {Animal Crackers} + {1930}[The ambiguity of the prepositional phrase ``in my pajamas'' would be removed by a dependency tree. It can either be linked to the noun ``elephant'' or to the verb ``shot.''] + While hunting in Africa, I shot an elephant in my pajamas. How he got into my pajamas, I don't know. +\end{epigraph} +After building a dependency tree, we can take the path between two nodes in the tree, for example the path between ``John'' and ``problem'' in the tree of Figure~\ref{fig:relation extraction:dependency tree} is: +\begin{indentedexample} + \(\leftarrow\)\texttt{N:subj:V}\(\leftarrow\)% +find% + \(\rightarrow\)\texttt{V:obj:N}\(\rightarrow\)% +solution% + \(\rightarrow\)\texttt{N:to:N}\(\rightarrow\)% +\end{indentedexample} +Note that lemmatization is performed on the nodes. +\Textcite{dirt} state their assumption as an extension of the distributional hypothesis (see section~\ref{sec:context:history}): +\begin{spacedblock} + \strong{Distributional Hypothesis on Dependency Paths:} + \emph{If two dependency paths occur in similar contexts, they tend to convey similar meanings.} +\end{spacedblock} +In the case of \textsc{dirt}, context is defined as the two endpoints of the paths. +For example, the context of the path given above in Figure~\ref{fig:relation extraction:dependency tree} consists of the words ``John'' and ``problem.'' +As such, this can be seen as a probabilistic version of the \(\relationSet_\entitySet\times\dataSet\to\relationSet_\sentenceSet\) step. +In order to ensure these paths correspond to meaningful relations, only paths between nouns are considered. +For example, by counting all entities appearing at the endpoints of the path above, \textcite{dirt} observe that the following path have similar endpoints: +\begin{indentedexample} + \(\leftarrow\)\texttt{N:subj:V}\(\leftarrow\)% +solve% + \(\rightarrow\)\texttt{V:obj:N}\(\rightarrow\)% +\end{indentedexample} +Therefore, they can conclude that these two paths correspond to the same relation. +The orientation of a path is not essential. +If the subject of ``solve'' appears after its object in a sentence, we still want this path to be counted the same as the one above. +As introduced in Section~\ref{sec:relation extraction:directionality}, this is a common problem in relation extraction. +To solve this in a relatively straightforward manner, we simply assume all paths come in the two possible orientations, so for each sentence, the extracted path and its reverse are added to the dataset. +We use a mutual information-based measure to evaluate how similar two set of endpoints are. +Since counting all possible pairs would be too memory intensive---the squared size of the vocabulary \(|V|^2\) is usually in the order of the billion or more---we measure the similarity of the first and second endpoint separately. +To measure the preference of the dependency path \(\pi\) to have the word \(w\in V\) appears at the endpoint \(\ell\in\{\leftarrow, \rightarrow\}\), the following conditional pointwise mutual information is used:% +\begin{marginparagraph} + The similarity metric equations in \textcite{dirt} are quite informal. + In particular, they do not state that \(\ell\) has a special role as a conditional variable in the \(\pmi\) and erroneously designate the same value as \(\operatorname{mi}(\pi, m, \ell)\). + The equations given here are our own. +\end{marginparagraph} +\begin{align*} + \pmi(\pi, w\mid \ell) & = \log \frac{P(\pi, w \mid \ell)}{P(\pi\mid \ell)P(w\mid \ell)} \\ + & = \log \frac{P(\pi, \ell, w)P(\ell)}{P(\pi,\ell)P(\ell,w)}. +\end{align*} +This quantity can be computed empirically using a hash table counting how many time the triplet \((\pi, \ell, w)\) appeared in the dataset. +We can then compute the similarity between two paths given an endpoint \(\ell\) then take the geometric average for the two possible value of \(\ell\) to obtain an unconditioned similarity between paths: +\begin{equation*} + \operatorname{sim}(\pi_1, \pi_2, \ell) = \frac + {\sum_{w\in C(\pi_1, \ell)\cap C(\pi_2, \ell)} \big( \pmi(\pi_1, w\mid \ell) + \pmi(\pi_2, w\mid \ell) \big)} + {\sum_{w\in C(\pi_1, \ell)} \pmi(\pi_1, w\mid \ell) + \sum_{w\in C(\pi_2, \ell)} \pmi(\pi_2, w\mid \ell)} +\end{equation*} +\begin{equation*} + \operatorname{sim}(\pi_1, \pi_2) = + \sqrt{\operatorname{sim}(\pi_1, \pi_2, \leftarrow) + \times \operatorname{sim}(\pi_1, \pi_2, \rightarrow)}, +\end{equation*} +where \(C(\pi, \ell)\) designates the context, that is the set of words appearing at the endpoint \(\ell\) of the path \(\pi\). + +Using this similarity function, \textcite{dirt} can find sets of paths corresponding to particular relations by looking at frequent paths above a fixed similarity threshold. +They evaluate their method manually on a question answering dataset. +For each question, they extract the corresponding path and then look at the 40 most similar paths in their dataset and manually tag whether these paths would answer the original question. +The accuracy of \textsc{dirt} ranges from \(92.5\%\) for the relation ``\textsl{manufactures}'' to \(0\%\) for the relation ``\textsl{monetary value of}'' for which no similar paths were found. + +\subsection{Hand-designed Feature Extractors} +\label{sec:relation extraction:hand-designed features} +The first supervised systems for relation extraction were designed for the template relations (\textsc{tr}) task of the seventh message understanding conference (\textsc{muc-7}). +\begin{marginparagraph} + To put these results into perspective with latter work, note that \textcite{ie2} mention they ran their model a 167\,\textsc{mh}z processor with 128\,\textsc{mb} of \textsc{ram}. +\end{marginparagraph} +The best result was obtained by the \(\textsc{ie}^2\) system \parencite{ie2}, which relied on manual pattern development, with an \fone{} score of 76\%. +A close second was the 71\% \fone{} score of the \textsc{sift} system \parencitex{sift}, which was devoid of hand-written patterns. +\textsc{sift} builds an augmented parse tree of the sentence, where nodes are added to encode the semantic information conveyed by each constituent. +New nodes are created using an algorithm akin to a probabilistic context-free grammar using maximum likelihood. +The semantic annotations are chosen following co-occurrence counts in the training set, using dynamic programming to search the space of augmented parse trees efficiently. +\textsc{sift} also uses a model to find cross-sentence relations, which represent 10--20\% of the test set. +The predictions are made from a set of elemental features, one of which was whether the candidate fact was seen in a previous sample; this gives a slight aggregate orientation to \textsc{sift}, even though it is primarily a sentential approach (Section~\ref{sec:relation extraction:definition}). +This first systematic evaluation of models on the same dataset set the stage for the development of the relation extraction task. + +Subsequently, several methods built upon carefully designed features. +This is for example the case of \textcitex{maximum_entropy_re} who use the maximum entropy principle on the following set of features: +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/syntactic parse tree.tex} + \scaption[Example of syntactic parse tree.]{ + Example of syntactic parse tree generated by the \textsc{pcfg} parser \parencite{pcfg}. + The following abbreviations are used: + \texttt{S} (simple declarative clause), + \texttt{NP} (noun phrase), + \texttt{VP} (verb phrase), + \texttt{ADJP} (adjective phrase), + \texttt{NNS} (plural noun), + \texttt{NNP} (singular proper noun), + \texttt{RB} (adverb), + \texttt{JJ} (adjective). + In contrast to a dependency tree (Figure~\ref{fig:relation extraction:dependency tree}), the words correspond to the tree's leaves, while internal nodes correspond to constituents clauses. + \label{fig:relation extraction:syntactic parse tree} + } +\end{marginfigure} +\begin{itemize} + \item entities and infix words with positional markers, + \item entity types by applying \textsc{ner} to the corpus, + \item entity levels, that is whether the entity is a composite noun or a pronoun which was linked to an entity through coreference resolution, + \item the number of other words and entities appearing between \(e_1\) and \(e_2\), + \item whether \(e_1\) and \(e_2\) are in the same noun phrase, verb phrase or prepositional phrase, + \item the dependency neighborhood, that is the neighboring nodes in the dependency tree (see Figure~\ref{fig:relation extraction:dependency tree}), + \item the syntactic path, that is the path between the entities in the syntactic parse tree (see Figure~\ref{fig:relation extraction:syntactic parse tree}). +\end{itemize} +Let's call \((f_i(x, r))_{i\in\{1,\dotsc,n\}}\) the indicator functions which equal 1 iff \(x\) has feature \(i\) and convey \(r\). +The maximum entropy principle states that a classifier should match empirical data on the observed space but should have maximal entropy outside it. +Calling \(Q^*\) the optimal probability model in this sense, we have: +\begin{align*} + Q^* & = \argmax_{Q\in \symcal{Q}} \entropy(Q) \\ + & = \argmax_{Q\in \symcal{Q}} \sum_{(x,r)\in\dataSet} - Q(x, r) \log Q(r\mid x) \\ + & = \argmax_{Q\in \symcal{Q}} \sum_{(x,r)\in\dataSet} - \empP(x) Q(r\mid x) \log Q(r\mid x), +\end{align*} +\begin{marginparagraph}[-12mm] + As a reminder, \(\empP\) denotes the empirical distribution. +\end{marginparagraph} +where \(\symcal{Q}\) is the set of probability mass functions matching observations: +\begin{equation*} + \symcal{Q} = \left\{\, \text{p.m.f.\ } Q \,\middle|\, \expectation_{(x, r)\sim Q}[f_i(x, r)] = \expectation_{(x, r)\sim \empP}[f_i(x, r)] \,\right\}. +\end{equation*} +Given this setup, the solution is part of a very restricted class of functions: +\begin{equation*} + Q^*(r\mid x; \vctr{\lambda}) \propto \displaystyle\exp \sum_{i=1}^n \lambda_i f_i(x, r). +\end{equation*} +The parameters \(\vctr{\lambda}\) are estimated using an algorithm called generalized iterative scaling (\textsc{gis}, \cite{gis}). +Using this approach, \textcite{maximum_entropy_re} evaluate their model on a dataset succeeding \textsc{muc-7} called \textsc{ace} (to be precise, \textsc{ace}~2003, see Section~\ref{sec:datasets:ace} for details). +They achieve an \fone{} of 52.8\% on 24 \textsc{ace} relation subtypes. + +\subsection{Kernel Approaches} +\label{sec:relation extraction:kernel} +Designing a set of low-dimensional features is a tedious task: a large set of features can be computationally prohibitive, while a small set of features is necessarily limiting since they can never completely capture the essence of all samples which live in higher dimension. +The kernel approaches seek to avoid this limitation by comparing samples pairwise without passing through an explicit intermediary representation. +To do so, a kernel function \(k\) is defined over pair of samples: +\begin{equation*} + k\colon (\sentenceSet\times\entitySet^2)\times(\sentenceSet\times\entitySet^2)\to\symbb{R}_{\geq 0}, +\end{equation*} +where \(k\) acts as a similarity measure and is required to be symmetric and positive-semidefinite. +It can be shown that there is an equivalence between kernel functions and features space; for each kernel function \(k\) there is an implicit set of features \(\vctr{f}\) such that \(k(x_1, x_2) = \vctr{f}(x_1)\cdot\vctr{f}(x_2)\). +However, some kernel function \(k\) might be computed without having to enumerate all features \(\vctr{f}\). + +This property is used for relation extraction by \textcitex{kernel_parse} who define a similarity function \(k\) between shallow parse trees.% +\sidenote{A shallow parse tree is similar to a syntactic parse tree (Figure~\ref{fig:relation extraction:syntactic parse tree}) on a partition of the words of a sentence \parencite{shallow_parse_tree}.} +The tree kernel is defined through a similarity on nodes with a recursive call on children nodes. +The equivalent feature space would need to contain all possible sub-trees which are impractical to enumerate. +\Textcite{kernel_parse} train a support vector machine (\textsc{svm}, \cite{svm}) and a voted perceptron \parencite{voted_perceptron} on a dataset they hand-labeled. +\Textcitex{kernel_dependency} used a similar approach with a tree kernel, except that they used dependency trees (Figure~\ref{fig:relation extraction:dependency tree}) instead of syntactic parse trees. +They trained \textsc{svm}s on the \textsc{ace}~2004 dataset (Section~\ref{sec:datasets:ace}), with their best setup reaching an \fone{} of 63.2\%. +Finally, \textcitex{kernel_exploring} also trained an \textsc{svm} but directly used the dot product inside the feature space as a kernel.% +\sidenote{In the same way that a kernel always corresponds to the dot product in a feature space, the reverse can be shown to be true too, since a Gram matrix is always semidefinite positive.} +Extracting a wide variety of features, they were able to reach an \fone{} score of 74.7\% on the \textsc{ace}~2004 dataset. + +\subsection{Piecewise Convolutional Neural Network} +\label{sec:relation extraction:pcnn} +In the 2010s, machine learning models moved away from hand-designed features towards automatic feature extractors (Section~\ref{sec:context:history}). +In relation extraction, this move was initiated by \textcite{rmvs} using an \textsc{rnn}-like model (Section~\ref{sec:context:rnn}), but it really started to gain traction with piecewise convolutional neural networks (\textsc{pcnn}, \citex{pcnn}). +\textsc{pcnn}s perform supervised relation extraction using deep learning. +In contrast to previous models, they learn a \textsc{cnn} feature extractor (Section~\ref{sec:context:cnn}) on top of word2vec embeddings (Section~\ref{sec:context:word2vec}) instead of using hand-engineered features. +Furthermore, \textsc{pcnn} uses the split-in-three-affixes method of \textsc{dipre} (Figure~\ref{fig:relation extraction:dipre split}). +They feed each affix to a \textsc{cnn} followed by a max-pooling to obtain a fixed-length representation of the sentence, which depends on the position of the embeddings. +This representation is then used to predict the relation using a linear and softmax layer. +While the global position invariance of \textsc{cnn} is interesting for language modeling, phrases closer to entities might be of more importance for relation extraction, thus \textsc{pcnn} also uses temporal encoding (Section~\ref{sec:context:attention lm}). +Figure~\ref{fig:relation extraction:pcnn} showcases a \textsc{pcnn} model. + +\begin{figure*}[ht!] + \centering + \input{mainmatter/relation extraction/pcnn.tex} + \scaption[Architecture of a \textsc{pcnn} model.]{ + Architecture of a \textsc{pcnn} model. + The model is only given a sentence that was split into three pieces; entities are ignored. + The embeddings of the words in each piece are concatenated with two positional embeddings. + Each piece is then fed to a convolutional layer, and a linear layer merges the three representations together. + At the softmax output, we obtain a probability distribution over possible relations given the sentence. + \label{fig:relation extraction:pcnn} + }[-2cm] +\end{figure*} + +The setup described above can be used for sentential relation extraction. +However, \textcite{pcnn} and subsequent works place themselves in the aggregate setup. +Therefore, we will wait until Section~\ref{sec:relation extraction:pcnn aggregate} to delve into the training algorithm and experimental results of \textsc{pcnn}s. + +\subsection{Transformer-based Models} +\label{sec:relation extraction:mtb sentential} +Following the progression of Section~\ref{sec:context:sentence}, \textsc{cnn}-based models were soon replaced by transformer-based models. +\Textcitex{mtb} introduce the unsupervised matching the blanks (\textsc{mtb}) model together with an in-depth study on the use of transformers for relation extraction. +We will focus on the transformer extractor in this section and study the unsupervised model in Section~\ref{sec:relation extraction:mtb}. +\Textcite{mtb} introduces several methods to extract an entity-aware representation of a sentence using \textsc{bert} (Section~\ref{sec:context:transformers}). +These different methods can be characterized along two axes: +\begin{description} + \item[Entity Span Identification,] that is how are the entities marked in the sentence. + This can be \emph{none}, meaning that the entities are not differentiated from the other words in the sentence. + It can be through \emph{entity markers}, i.e.~new tokens are introduced to mark the two entities' beginning and end, as showcased by Figures~\ref{fig:relation extraction:epgnn sentence representation} and~\ref{fig:relation extraction:emes}. + Finally, it can be through a special feature of \textsc{bert}: \emph{token type embeddings}; in this case, the embeddings of the entity tokens are added to another embedding representing the slot---either \(e_1\) or \(e_2\)---of the entity. + \item[Output Construction,] that is how a fixed-size representation is obtained from the sequence of token embeddings. + A first approach is to simply use the \textsc{cls} \emph{token} embedding, i.e.~the sequence's first token, which should encompass the whole sentence semantic (Section~\ref{sec:context:transformers}). + A second approach is to use \emph{entity max-pooling}: each entity is represented by the component-wise maximum along its tokens embeddings, the sentence is represented by the concatenation of its entities representations. + A variant of this, using mean pooling combined with the \textsc{cls} method, is used by \textsc{epgnn} (Figure~\ref{fig:relation extraction:epgnn sentence representation}). + These representations should better capture the semantic surrounding the entities, in contrast to the \textsc{cls} token, which captures the whole sentence's semantic. + Finally, a last option is to use the embeddings of the \emph{entity start markers}; this is the option illustrated by Figure~\ref{fig:relation extraction:emes} and has the advantage to lessen the dependence of the representation on the entity surface form (Section~\ref{sec:relation extraction:entity} describes why this could be desirable). +\end{description} + +\begin{figure}[th] + \centering + \input{mainmatter/relation extraction/emes.tex}% + \scaption*[\textsc{mtb} entity markers--entity start sentence representation.]{ + \textsc{mtb} entity markers--entity start sentence representation. + ``Bentham'' was split into two subword tokens, ``Ben-'' and ``-tham'' by the \textsc{bpe} algorithm described in Section~\ref{sec:context:bpe}. + The contextualized embeddings of most words are ignored. + The final representation is only built using the representation of \texttt{<e1>} and \texttt{<e2>}. + However, note that these representations are built from all the words in the sentence using an attention mechanism (Section~\ref{sec:context:attention}). + In the original work of \textcite{mtb}, the representation extracted by \textsc{bert} is either fed through layer normalization \parencite{layernorm} or to a linear layer depending on the dataset. + \label{fig:relation extraction:emes} + } +\end{figure} + +The best results obtained by \textsc{mtb} were with the entity markers--entity start method. +This is the method we focus on from now on. +We refer to this sentence representation model by the function \(\bertcoder\colon\sentenceSet\to\symbb{R}^d\) illustrated Figure~\ref{fig:relation extraction:emes}. +Training is performed using a softmax layer of size \(|\relationSet|\) with a cross-entropy loss. +Using a standard \bertArch{large} pre-trained on a \textsc{mlm} task, \textsc{mtb} obtains a macro-\(\overHalfdirected{\fone}\) of 89.2\% on the SemEval 2010 Task 8 (Section~\ref{sec:datasets:semeval}). diff --git a/mainmatter/relation extraction/supervised metrics.tex b/mainmatter/relation extraction/supervised metrics.tex @@ -0,0 +1,25 @@ +\begin{tikzpicture}[braced/.style={decorate,decoration={brace,amplitude=2mm}}] + \path[fill=black!10] (1.6, 0) -- (2.4, 0) -- (2.4, 4) -- (1.6, 4) -- cycle; + + \path[fill=Dark2-A!30] (0, 2) -- (4, 2) -- (4, 2.4) -- (0, 2.4) -- cycle; + \path[fill=Dark2-C!30] (0, 2) -- (4, 2) -- (4, 1.6) -- (0, 1.6) -- cycle; + + \path[fill=Dark2-A] (2, 2) -- (2, 2.4) -- (1.6, 2.4) -- (1.6, 2) -- cycle; + \path[fill=Dark2-C] (2, 2) -- (2, 1.6) -- (2.4, 1.6) -- (2.4, 2) -- cycle; + + \path[pattern=crosshatch dots, pattern color=Dark2-B] (2, 2.4) -- (2.4, 2.4) -- (2.4, 2) -- (1.6, 2) -- (1.6, 1.6) -- (2, 1.6) -- cycle; + + \draw (0, 0) -- (4, 0) -- (4, 4) -- (0, 4) -- cycle; + + \node[anchor=south west] (gt) at (4, 2) {\vphantom{\(\breve{r}\)}\(r\)}; + \node[anchor=north west] (gb) at (4, 2) {\(\breve{r}\)}; + \node[anchor=north east] (cl) at (2, 0) {\vphantom{\(\breve{r}\)}\(r\)}; + \node[anchor=north west] (cr) at (2, 0) {\(\breve{r}\)}; + \node[left=0mm of cl] {\(\cdots\)}; + \node[right=0mm of cr] {\(\cdots\)}; + \node[above=0mm of gt] {\(\vdots\)}; + \node[below=0mm of gb] {\(\vdots\)}; + + \draw[braced] (0cm, 41mm) -- (4cm, 41mm) node[midway,above,yshift=2mm] {\(c\)}; + \draw[braced] (-1mm, 0) -- (-1mm, 4) node[midway,left,xshift=-2mm] {\(g\)}; +\end{tikzpicture}% diff --git a/mainmatter/relation extraction/supervised samples.tex b/mainmatter/relation extraction/supervised samples.tex @@ -0,0 +1,9 @@ +\begin{tabularx}{\textwidth}{@{} P{13mm} P{17mm} P{17mm} Y @{}} + \toprule + Head & Relation & Tail & Sentence \\\midrule + \wdent{210175}\newline \textsc{mi5} & \wdrel{159}\newline headquarters location & \wdent{198519}\newline Thames House & The exterior and interior of Freemasons' Hall continued to be a stand-in for \utail{Thames House}, the headquarters of \uhead{\textsc{mi5}}. \\\addlinespace[1mm] + \wdent{210175}\newline \textsc{mi5} & \wdrel{101}\newline field of work & \wdent{501700}\newline counter\-intelligence & Golitsyn's claims about Wilson were believed in particular by the senior \uhead{\textsc{mi5}} \utail{counterintelligence} officer Peter Wright. Wright, Peter (1987) \\\addlinespace[1mm] + \wdent{158363}\newline \textsc{smersh} & \wdrel{101}\newline field of work & \wdent{501700}\newline counter\-intelligence & In its \utail{counter-espionage} and counter-intelligence roles, \uhead{\textsc{smersh}} appears to have been extremely successful throughout World War II. \\\addlinespace[1mm] + \wdent{198519}\newline Thames House & \wdrel{466}\newline occupant & \wdent{210175}\newline \textsc{mi5} & The Freemasons' Hall in London served as the filming location for \uhead{Thames House}, the headquarters for \utail{\textsc{mi5}}. \\ + \bottomrule +\end{tabularx}% diff --git a/mainmatter/relation extraction/supervised.tex b/mainmatter/relation extraction/supervised.tex diff --git a/mainmatter/relation extraction/supervision.tex b/mainmatter/relation extraction/supervision.tex @@ -0,0 +1,137 @@ +\section{The Problem of Data Scarcity} +\label{sec:relation extraction:supervision} +Ideally, a labeled dataset should be available for the source language and target relation domain \(\relationSet\), but alas, this is rarely the case. +In particular, the order of \(\relationSet\) can range in the thousands, in which case, accurate labeling is tedious for human operators. +To circumvent this problem, alternative supervision strategies have been used. + +Despite the ubiquity of the terms, it is not easy to define the different forms of supervision clearly. +We use the following practical definition: a dataset is supervised if among its features, one---the labels---must be predicted from the others. +Furthermore, to distinguish with the self-supervised setup, we need to impose that the labels must be somewhat hard to obtain, typically through manual annotation.% +\sidenote[][-30mm]{ + To add to the confusion, the distinction between self-supervised and unsupervised is not necessarily pertinent, e.g.~Yann LeCun retired ``unsupervised'' from his vocabulary, replacing it with ``self-supervised'' \parencite{selfsupervised}. + In this case, the difficulty of obtaining the labels might be the sole difference between the ``unsupervised/self-supervised'' and ``supervised'' setups. +} +For our task at hand, a supervised dataset takes the form \(\dataSet_\relationSet\subseteq\sentenceSet\times\entitySet^2\times\relationSet\), indeed we seek to predict relation labels and obtaining those is tedious and error-prone. +On the other hand, an unsupervised dataset takes the form \(\dataSet\subseteq\sentenceSet\times\entitySet^2\), which is much easier to obtain: vast amounts of text are now digitized and can be processed by an entity chunker and an entity linker. +An intermediate supervision setting is semi-supervision when a small subset of samples are supervised while other are left unsupervised, which can be stated as \(\dataSet_\text{semi}\subseteq\sentenceSet\times\entitySet^2\times(\relationSet\cup\{\varepsilon\})\).% +\sidenote[][-24mm]{ + Here, we denote by \(\varepsilon\) the absence of labels for a sample since this is often reflected by an empty field. +} + +Despite these different kinds of datasets on which a relation extraction model can be trained, evaluating such a model is nearly always done using a supervised dataset \(\dataSet_\relationSet\). +In this section, we present two other approaches to train a model without manual labeling: bootstrap and distant supervision. + +\subsection{Bootstrap} +\label{sec:relation extraction:bootstrap} +\begin{marginalgorithm}[-5cm] + \input{mainmatter/relation extraction/bootstrap algorithm.tex} + \scaption[The bootstrap algorithm.]{ + The bootstrap algorithm. + Occurrences are simply a set of samples \(O\subseteq\dataSet\) conveying the target relation. + The algorithm can be either seeded with a set of occurrences \(O\) \parencite{dipre} or a set of rules \(R\) \parencite{hearst_hyponyms}. + When starting with a set of occurrences, the algorithm must first start by extracting a set of rules, then alternate between finding occurrences and rules as listed. + \label{alg:relation extraction:bootstrap} + } +\end{marginalgorithm} + +Another method to deal with the scarcity of data is to use bootstrap. +Early approaches to relation extraction often focused on a single relation and fell into this category of bootstrapped methods. +The bootstrap process (Algorithm~\ref{alg:relation extraction:bootstrap}) starts with a small amount of labeled data and finds extraction rules by generalizing to a large amount of unlabeled data. +As such, it is a semi-supervised approach. +We now describe this algorithm by following the work that pioneered this approach. + +\textcite{hearst_hyponyms} propose a method to detect a single relation between noun phrases: hyponymy. +They define \(e_1\) to be an hyponym of \(e_2\) when the sentence ``An \(e_1\) is a (kind of) \(e_2\).'' is acceptable to an English speaker. +This relation is then detected inside a corpora using lexico-syntactic patterns such as:% +\sidenote{The syntax used here is inspired by regular expression: ``\texttt{()}'' are used for grouping, ``\texttt{?}'' indicates the previous atom is optional, ``\texttt{|}'' is used for alternatives and ``\texttt{*}'' is the Kleene star meaning zero or more repetitions.} +\begin{indentedexample} + \raggedright + \(e_1\) ,\texttt{?} including \texttt{(\textrm{\(e_2\),})*} \texttt{(\textrm{or}|\textrm{and})?} \(e_3\)\\ + \(\implies \tripletHolds{e_2}{\textsl{hyponym of\/}}{e_1}\)\\ + \(\implies \tripletHolds{e_3}{\textsl{hyponym of\/}}{e_1}\)\\ +\end{indentedexample} +where the entities \(e_i\) are constrained to be noun phrases. +This rule matches on the following sentence: +\begin{indentedexample} + \raggedright + All common-law countries, including Canada and England\dots\\ + \(\implies \tripletHolds{\text{Canada}}{\textsl{hyponym of\/}}{\text{Common-law country}}\)\\ + \(\implies \tripletHolds{\text{England}}{\textsl{hyponym of\/}}{\text{Common-law country}}\)\\ +\end{indentedexample} + +\Textcitex{hearst_hyponyms} proposes the following process: start with known facts such as \(\operatorname{hyponym}(\text{England}, \text{Country})\), find all places where the two entities co-occur in the corpus and write new rules from the patterns observed, which allows them to discover new facts to repeat the process with. +Beside some basic lemmatization---which explains why ``countries'' became ``country'' in the example above---all noun phrases are treated as possible entities. +This is sensible since the end goal of the approach is to generate new facts for the WordNet knowledge base. +In \textcite{hearst_hyponyms}, writing new rules was not done automatically but performed manually. + +Following equation~\ref{eq:relation extraction:sentential definition}, a sentential relation extraction system usually defines a relation \(r\) as a subset of \(\sentenceSet\times\entitySet\times\entitySet\), i.e.\ relations are conveyed jointly by sentences and entity pairs. +In contrast, \textcite{hearst_hyponyms} makes the following assumption: +\begin{marginparagraph} + The assumption of \textcite{hearst_hyponyms} is that there are two morphisms \(\sentenceSet\to\relationSet\) and \(\entitySet^2\to\relationSet\), therefore \(\dataSet\) must have a form which makes this decomposition possible: \((s, \vctr{e})\in\dataSet\) if and only if \(s\) and \(\vctr{e}\) are mapped to the same relation. + In other words, \(\dataSet\) completes the two relation extraction morphisms to a commutative square: + \begin{center} + \input{mainmatter/relation extraction/pullback.tex} + \end{center} + In category theory, this object is called a pullback and noted \(\times_\relationSet\). + This also means that given a sample from \(\dataSet\), it is possible to find its relation without looking at its sentence or its entities since either of them is sufficient. +\end{marginparagraph} +\begin{assumption}{pullback} + It is possible to find the relation conveyed by a sample by looking at the entities alone and ignoring the sentence; and conversely by looking at the sentence alone and ignoring the entities. + + \smallskip + \noindent + \(\dataSet = \sentenceSet\times_\relationSet\entitySet^2.\) +\end{assumption} +This implies that given a pair of entities, whatever is the sentence in which they appear, the conveyed relation is the same. +On the contrary, given a sentence, the conveyed relation is always the same, whatever the entities. +As such the representation of a relation is split into two parts: +\begin{description} + \item[a set of entity pairs] \(r_\entitySet \subseteq \entitySet^2\), which can be represented exactly; + \item[a set of sentences] \(r_\sentenceSet \subseteq \sentenceSet\), which in \textcite{hearst_hyponyms} was represented by a set of patterns matching only sentences in \(r_\sentenceSet\), such as ``\(e_1\) ,\texttt{?} including \texttt{(\textrm{\(e_2\),})*} \texttt{(\textrm{or}|\textrm{and})?} \(e_3\).'' +\end{description} +Given a dataset \(\dataSet\subseteq\sentenceSet\times\entitySet^2\), it is possible to map from \(r_\entitySet\) to \(r_\sentenceSet\) by taking all sentences where the two entities appear and vice-versa by taking all pairs of entities appearing in the given sentences. +The second process \(\relationSet_\sentenceSet\times \dataSet\to \relationSet_\entitySet\) is straightforward to implement exhaustively. +While the first process \(\relationSet_\entitySet\times \dataSet\to \relationSet_\sentenceSet\) was performed manually by \textcite{hearst_hyponyms}. + +\subsection{Distant Supervision} +\label{sec:relation extraction:distant supervision} +\textcitex{distant_early}[-5mm] introduced the idea of weak supervision to relation extraction as a compromise between hand labeled dataset and unsupervised training. +It was then popularized by \textcitex{distant} under the name \emph{distant supervision}. +Their idea is to use a knowledge base \(\kbSet\subseteq\entitySet^2\times\relationSet\) to supervise an unsupervised dataset \(\dataSet\). +The underlying assumption can be stated as: +\begin{marginparagraph} + The use of assumptions or modeling hypotheses noted \hypothesis{name} is central to several relation extraction models, especially unsupervised ones. + We strongly encourage the reader to look at the list of assumptions in Appendix~\ref{chap:assumptions}. + The appendix provides counter-examples when appropriate. + Furthermore, it lists the sections in which each assumption was introduced for reference. +\end{marginparagraph} +\begin{assumption}{distant} + A sentence conveys all the possible relations between all the entities it contains. + + \smallskip + \noindent + \(\dataSet_\relationSet = \dataSet \bowtie \kbSet\) + + \smallskip + \noindent + where \(\bowtie\) denotes the natural join operator: + \begin{equation*} + \dataSet \bowtie \kbSet = + \left\{\, + (s, e_1, e_2, r) + \mid + (s, e_1, e_2)\in\dataSet + \land + (e_1, e_2, r)\in\kbSet + \,\right\}. + \end{equation*} +\end{assumption} +In other words, each sentence \((s, e_1, e_2)\in\dataSet\) is labeled by all relations \(r\) present between \(e_1\) and \(e_2\) in the knowledge base \(\kbSet\). +This is sometimes referred to as an unaligned dataset, since sentences are not aligned with their corresponding facts. +The assumption \hypothesis{distant} is quite obviously false, and is only used to build a supervised dataset. +A classifier is then trained on this dataset. +In most works, including the one of \textcite{distant}, the model is designed to handle the vast amount of false positive in \(\dataSet\bowtie\kbSet\), usually through the aggregate extraction setting (see Section~\ref{sec:relation extraction:definition}). + +A caveat of distantly supervised datasets is that evaluation is often complex. +\Textcite{distant} evaluate their approach on Freebase (Section~\ref{sec:datasets:freebase}) by holding-out part of the knowledge base. +However, the number of false negatives forces them to manually label the facts as true or false themselves. diff --git a/mainmatter/relation extraction/syntactic parse tree.tex b/mainmatter/relation extraction/syntactic parse tree.tex @@ -0,0 +1,52 @@ +\begin{tikzpicture}[ + word after/.style={right=0.9mm of #1, inner sep=0}, + punctuation after/.style={right=0mm of #1, inner sep=0}, + POS/.style={inner sep=0.4mm}, + node to word/.style={dashed}, + arc label/.style={pos=0.5,sloped,above}, + ] + % (ROOT (S (NP (NNP Syrielle)) (VP (VBD ate) (NP (ADJP (RB too) (JJ many)) (NNS potatoes))) (. .))) + \node[inner sep=0] (john) {\strut John}; + \node[word after=john] (ate) {\strut ate}; + \node[word after=ate] (too) {\strut too}; + \node[word after=too] (many) {\strut many}; + \node[word after=many] (tomatoes) {\strut tomatoes}; + \node[punctuation after=tomatoes] (period) {\strut .}; + + \coordinate (lvl0) at (0, 3.75); + \coordinate (lvl1) at (0, 3); + \coordinate (lvl2) at (0, 2.25); + \coordinate (lvl3) at (0, 1.5); + \coordinate (lvl4) at (0, 0.75); + + \node[POS] (NNP) at (lvl2-|john) {\texttt{NNP}}; + \node[POS] (VBD) at (lvl2-|ate) {\texttt{VBD}}; + \node[POS] (RB) at (lvl4-|too) {\texttt{RB}}; + \node[POS] (JJ) at (lvl4-|many) {\texttt{JJ}}; + \node[POS] (NNS) at (lvl3-|tomatoes) {\texttt{NNS}}; + + \foreach \word/\constituant in {john/NNP, ate/VBD, too/RB, many/JJ, tomatoes/NNS}{ + \draw[node to word] (\word) -- (\constituant); + } + + \coordinate (ADJPx) at ($(RB)!0.5!(JJ)$); + \coordinate (NP2x) at ($(ADJPx)!0.5!(NNS)$); + \coordinate (VPx) at ($(NP2x)!0.5!(VBD)$); + \coordinate (Sx) at ($(VPx)!0.5!(NNP)$); + + \node[POS] (ADJP) at (lvl3-|ADJPx) {\texttt{ADJP}}; + \node[POS] (NP2) at (lvl2-|NP2x) {\(\texttt{NP}_2\)}; + \node[POS] (VP) at (lvl1-|VPx) {\texttt{VP}}; + \node[POS] (NP1) at (lvl1-|NNP) {\(\texttt{NP}_1\)}; + \node[POS] (S) at (lvl0-|Sx) {\texttt{S}}; + + \draw[arrow] (S) -- (NP1); + \draw[arrow] (NP1) -- (NNP); + \draw[arrow] (S) -- (VP); + \draw[arrow] (VP) -- (VBD); + \draw[arrow] (VP) -- (NP2); + \draw[arrow] (NP2) -- (ADJP); + \draw[arrow] (ADJP) -- (RB); + \draw[arrow] (ADJP) -- (JJ); + \draw[arrow] (NP2) -- (NNS); +\end{tikzpicture} diff --git a/mainmatter/relation extraction/universal schema.tex b/mainmatter/relation extraction/universal schema.tex @@ -0,0 +1,70 @@ +\begin{tikzpicture}[ + user/.style={text width=2cm, anchor=west, inner sep=0mm}, + item/.style={anchor=south west, inner sep=0mm,rotate=45}, + braced/.style={decorate,decoration={brace,amplitude=2mm}}, + known/.style={fill=Dark2-A!30,minimum width=1.5cm,minimum height=1cm}, + predicted/.style={fill=Dark2-B!30,minimum width=1.5cm,minimum height=1cm}, + unknown/.style={fill=Dark2-C!30,minimum width=1.5cm,minimum height=1cm}, + ] + \NewDocumentCommand\epairhead{m m m}{ + \pgfmathparse{-#1+0.5} + \node[user] (userh#1) at (-1.7, \pgfmathresult) {\footnotesize#2\textsubscript{\(e_1\)}\\#3\textsubscript{\(e_2\)}}; + \coordinate (user#1) at (0, \pgfmathresult); + } + \NewDocumentCommand\surfacehead{m m}{ + \pgfmathparse{1.5*#1-1} + \node[item] (itemh#1) at (\pgfmathresult, 0) {\footnotesize``\(e_1\) #2 \(e_2\)''}; + \pgfmathparse{1.5*#1-0.75} + \coordinate (item#1) at (\pgfmathresult, 0); + } + \NewDocumentCommand\relationhead{m m}{ + \pgfmathparse{1.5*#1-1} + \node[item] (itemh#1) at (\pgfmathresult, 0) {\footnotesize\(e_1\mathop{\textsl{#2\/}}e_2\)}; + \pgfmathparse{1.5*#1-0.75} + \coordinate (item#1) at (\pgfmathresult, 0); + } + + \NewDocumentCommand\matrixvalue{m m m m}{ + \node[#1] at (user#2-|item#3) {#4}; + } + + \epairhead{1}{Ferguson}{Harvard} + \epairhead{2}{Oman}{Oxford} + \epairhead{3}{Firth}{Oxford} + \epairhead{4}{Gödel}{Princeton} + \surfacehead{1}{professor at} + \surfacehead{2}{historian at} + \relationhead{3}{employee of} + \relationhead{4}{member of} + + \draw[braced] (2.95, -4.1) -- (0, -4.1) node[midway,below,yshift=-2mm] {Surface forms}; + \draw[braced] (6, -4.1) -- (3.05, -4.1) node[midway,below,yshift=-2mm] {Relations}; + + \draw[braced] (-1.8, -4) -- (-1.8, 0) node[midway,left,xshift=-2mm] {\rotatebox{90}{Entity pairs}}; + + \draw[braced] (6.1, 0) -- (6.1, -1.95) node[midway,right,xshift=2mm] {\rotatebox{-90}{Train}}; + \draw[braced] (6.1, -2.05) -- (6.1, -4) node[midway,right,xshift=2mm] {\rotatebox{-90}{Test}}; + + \matrixvalue{known}{1}{2}{1}; + \matrixvalue{known}{1}{3}{1}; + \matrixvalue{known}{1}{4}{1}; + \matrixvalue{known}{2}{1}{1}; + \matrixvalue{known}{2}{2}{1}; + \matrixvalue{known}{3}{2}{1}; + \matrixvalue{known}{4}{1}{1}; + + \matrixvalue{predicted}{3}{1}{0.95}; + \matrixvalue{predicted}{3}{3}{0.97}; + \matrixvalue{predicted}{3}{4}{0.95}; + \matrixvalue{predicted}{4}{2}{0.05}; + \matrixvalue{predicted}{4}{3}{0.93}; + \matrixvalue{predicted}{4}{4}{0.97}; + + \matrixvalue{unknown}{1}{1}{}; + \matrixvalue{unknown}{2}{3}{}; + \matrixvalue{unknown}{2}{4}{}; + + \draw (0, 0) grid[xstep=1.5,ystep=1] (6, -4); + \draw[ultra thick] (3, 0) -- (3, -4); + \draw[ultra thick] (0, -2) -- (6, -2); +\end{tikzpicture} diff --git a/mainmatter/relation extraction/unsupervised.tex b/mainmatter/relation extraction/unsupervised.tex @@ -0,0 +1,641 @@ +\section{Unsupervised Extraction Models} +\label{sec:relation extraction:unsupervised} +\begin{epigraph} + {Yann LeCun} + {Inaugural Lecture at Collège de France} + {2016} + % This translation comes from a Facebook post made by Yann LeCun himself, but the Collège de France reference should be easier to find. + If intelligence was a cake, unsupervised learning would be the cake, supervised learning would be the icing on the cake, and reinforcement learning would be the cherry on the cake. +\end{epigraph} +In the unsupervised setting, no samples are labeled with a relation, i.e.~all samples are triplets (sentence, head entity, tail entity) from \(\dataSet\subseteq\sentenceSet\times\entitySet^2\). +Furthermore, no information about the relation set \(\relationSet\) is available. +This is problematic since whether a specific semantic link is worthy of appearing in \(\relationSet\) or not is not well defined. +Having so little information about what constitutes a relation makes the problem intractable if we do not impose some restrictions upon \(\relationSet\). +All unsupervised models presented in this section are not universal and make some kind of assumption on the structure of the data or on its underlying knowledge base. +However, developing unsupervised relation extraction models is still interesting for three reasons: they +\begin{itemize*} + \item[(1)] do not necessitate labeled data except for validating the models; + \item[(2)] can uncover new relation types; and + \item[(3)] can be trained from large unlabeled datasets and then fine-tuned for specific relations. +\end{itemize*} + +For all models, we list the important modeling hypothesis such as \hypothesis{1-adjacency} and \hypothesis{pullback} introduced previously. +Appendix~\ref{chap:assumptions} contains a list of assumptions with some counterexamples and references to the sections where they were introduced. +We strongly encourage the reader to refer to it, especially when the implications of a modeling hypothesis is not immediately clear. + +\subsection{Evaluation} +\label{sec:relation extraction:unsupervised evaluation} +The output of unsupervised models vary widely. +The main modus operandi can be categorized into two categories: +\begin{description} + \item[Clustering] A first approach is to cluster the samples such that all samples in the same cluster convey the same relation and samples in different clusters convey different relations. + \item[Similarity Space] A second approach is to associate each sample with an element of a vector space equipped with a similarity function. + If two samples are similar in this vector space, they convey similar relations. + This can be seen as a soft version of the clustering approach. +\end{description} + +This distinction has an impact on how we evaluate the models. +In the first case, standard clustering metrics are used. +We introduce \bcubed{} \parencite{bcubed}, V-measure \parencite{v-measure} and \textsc{ari} \parencite{ari} in Section~\ref{sec:relation extraction:clustering}. +They are the most prevalent metrics in cluster evaluation, \bcubed{} in particular is widely used in unsupervised relation extraction. +In the second case, a few-shot evaluation can be used \parencite{fewrel}. +We introduce this approach in Section~\ref{sec:relation extraction:few-shot}. + +A difficulty of evaluating unlabeled clusters is that we do not know which cluster should be compared to which relation. +A possible solution to this problem is to use a small number of labeled samples, which can be used to constrain the output of a model to fall into a specific relation set \(\relationSet\). +This setup is actually similar to semi-supervised approaches such as label propagation (Section~\ref{sec:relation extraction:label propagation}), except that the model must be trained in an unsupervised fashion before being fine-tuned on the supervised dataset. +Similar to the label propagation model evaluation, unsupervised models evaluated by fine-tuning on a supervised dataset usually report performance varying the number of train labels. +These performances are measured using the standard supervised metrics introduced in Section~\ref{sec:relation extraction:supervised evaluation}. +Evaluating performances as a pre-training method can be used for all unsupervised models, in particular similarity-space-based approaches. + +\subsubsection{Clustering Metrics} +\label{sec:relation extraction:clustering} +In this section, we describe three metrics used to evaluate clustering approaches. +The first metric, \bcubed{} was first introduced to unsupervised relation extraction by rel-\textsc{lda} (\cite{rellda}, Section~\ref{sec:relation extraction:rellda}), while the other two were proposed as complements by \textcite{fitb} presented in Chapter~\ref{chap:fitb}. + +\begin{epigraph} + {Valve} + {``Portal''} + {2007} + The cake is a lie. +\end{epigraph} +To clearly describe these different clustering metrics, we propose a common probabilistic formulation---in practice, these probabilities are estimated on the validation and test sets---and use the following notations. +Let \(\rndm{X}\) and \(\rndm{Y}\) be random variables corresponding to samples in the dataset. +Following Section~\ref{sec:relation extraction:supervised evaluation}, we denote by \(c(\rndm{X})\) the predicted cluster of \(\rndm{X}\) and \(g(\rndm{X})\) its conveyed gold relation.% +\sidenote{ + This implies that a labeled dataset is sadly necessary to evaluate an unsupervised clustering model. +} + +\paragraph{B\texorpdfstring{\textsuperscript{3}}{³}} +The metric most commonly computed for unsupervised model evaluation is a generalization of \fone{} for clustering tasks called \bcubed{} \parencitex{bcubed}. +The \bcubed{} precision and recall are defined as follows: +\begin{align*} + \bcubed \operatorname{precision}(g, c) & = \expectation_{\rndm{X},\rndm{Y}\sim\uniformDistribution(\dataSet_\relationSet)} P\left(g(\rndm{X})=g(\rndm{Y}) \mid c(\rndm{X})=c(\rndm{Y})\right) \\ + \bcubed \operatorname{recall}(g, c) & = \expectation_{\rndm{X},\rndm{Y}\sim\uniformDistribution(\dataSet_\relationSet)} P\left(c(\rndm{X})=c(\rndm{Y}) \mid g(\rndm{X})=g(\rndm{Y})\right) \\ +\end{align*} +As precision and recall can be trivially maximized by putting each sample in its own cluster or by clustering all samples into a single class, the main metric \bcubed{} \fone{} is defined as the harmonic mean of precision and recall: +\begin{equation*} + \bcubed \fone{}(g, c) = \frac{2}{\bcubed{} \operatorname{precision}(g, c)^{-1} + \bcubed{} \operatorname{recall}(g, c)^{-1}} +\end{equation*} + +While the usual precision (Section~\ref{sec:relation extraction:supervised evaluation}) can be seen as the probability that a sample with a given prediction is correct, the \bcubed{} precision cannot use the correct relation as a reference to determine the correctness of a prediction. +Instead, whether an assignment is correct is computed as the expectation that a sample is accurately classified relatively to all other samples grouped in the same cluster. + +\paragraph{V-measure} +Another metric is the entropy-based V-measure \parencitex{v-measure}. +This metric is defined by homogeneity and completeness, which are akin to \bcubed{} precision and recall but rely on conditional entropy. +For a cluster to be homogeneous, we want most of its elements to convey the same gold relation. +In other words, the distribution of gold relations inside a cluster must have low entropy. +This entropy is normalized by the unconditioned entropy of the gold relations to ensure that it does not depend on the size of the dataset: +\begin{equation*} + \operatorname{homogeneity}(g, c) = 1 - \frac{\entropy\left(c(\rndm{X})\mid g(\rndm{X})\right)}{\entropy\left(c(\rndm{X})\right)}. +\end{equation*} +Similarly, for a cluster to be complete, we want all the elements conveying the same gold relation to be captured by this cluster. +In other words, the distribution of clusters inside a gold relation must have low entropy: +\begin{equation*} + \operatorname{completeness}(g, c) = 1 - \frac{\entropy\left(g(\rndm{X})\mid c(\rndm{X})\right)}{\entropy\left(g(\rndm{X})\right)}. +\end{equation*} +As \bcubed{}, the V-measure is summarized by the \fone{} value: +\begin{equation*} + \operatorname{V-measure}(g, c) = \frac{2}{\operatorname{homogeneity}(g, c)^{-1} + \operatorname{completeness}(g, c)^{-1}}. +\end{equation*} +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/clustering metrics.tex} + \scaption[Comparison of \bcubed{} and V-measure.]{ + Comparison of \bcubed{} and V-measure. + Samples conveying three different relations indicated by shape and color are clustered into three boxes. + The two rows represent two different clusterings, \bcubed{} favors the first one while V-measure favors the second. + V-measure prefers the second clustering since the blue star cluster is kept pure; on the other hand, the green circle cluster is impure no matter what, so its purity is not taken as much into account by the V-measure compared to \bcubed{}. + \label{fig:relation extraction:clustering metrics} + } +\end{marginfigure} +Compared to \bcubed{}, the V-measure penalizes small impurities in a relatively ``pure'' cluster more harshly than in less pure ones. +Symmetrically, it penalizes a degradation of a well-clustered relation more than of a less-well-clustered one. +This difference is illustrated in Figure~\ref{fig:relation extraction:clustering metrics}. + +\paragraph{Adjusted Rand Index} +The Rand index (\textsc{ri}, \cite{ri}) is the last clustering metric we consider, it is defined as the probability that cluster and gold assignments are compatible: +\begin{equation*} + \operatorname{\textsc{ri}}(g, c) = \expectation\limits_{\rndm{X},\rndm{Y}} \left[ P\left( + c(\rndm{X})=c(\rndm{Y}) \Leftrightarrow g(\rndm{X})=g(\rndm{Y}) + \right) \right] +\end{equation*} +In other words, given two samples, the \textsc{ri} is improved when both samples are in the same cluster and convey the same gold relation or when both samples are in different clusters and convey different relations; otherwise, the \textsc{ri} deteriorates. +The adjusted Rand index (\textsc{ari}, \citex{ari}) is a normalization of the Rand index such that a random assignment has an \textsc{ari} of 0, and the maximum is 1: +\begin{equation*} + \operatorname{\textsc{ari}}(g, c) = + \frac{\displaystyle\operatorname{\textsc{ri}}(g, c) - \expectation_{c\sim\uniformDistribution(\relationSet^\dataSet)}[\operatorname{\textsc{ri}}(g, c)]} + {\displaystyle\max_{c\in\relationSet^\dataSet} \operatorname{\textsc{ri}}(g, c) - \expectation_{c\sim\uniformDistribution(\relationSet^\dataSet)}[\operatorname{\textsc{ri}}(g, c)]} +\end{equation*} +In practice, the \textsc{ari} can be computed from the elements of the confusion matrix. +Compared to the previous metrics, \textsc{ari} will be less sensitive to a discrepancy between precision--homogeneity and recall--completeness since it is not a harmonic mean of both. + +\subsubsection{Few-shot} +\label{sec:relation extraction:few-shot} +\begin{marginparagraph} + This section only presents Few-shot evaluation. + It is possible---and quite common---to train a model using a few-shot objective, usually as a fine-tuning phase before a few-shot evaluation. + Since we are mostly interested in unsupervised approaches, we do not delve into few-shot training. + See \textcite{fewrel} for details. +\end{marginparagraph} +Clustering metrics are problematic since producing a clustering with no a priori knowledge on the relation schema \(\relationSet\) leads to unsolvable problems: +\begin{itemize} + \item Should the relation \textsl{sibling} be cut into \textsl{brother} and \textsl{sister}? + \item Is the relation between a country and its capital the same as the one between a county and its seat? + \item Is the ear \textsl{part of} the head in the same fashion that the star Altair is \textsl{part of} the Aquila constellation? +\end{itemize} +All of these questions can be answered differently depending on the design of the underlying knowledge base. +However, unsupervised clustering algorithms do not depend on \(\relationSet\). +They must decide whether ``Phaedra is the sister of Ariadne'' and ``Castor is the brother of Pollux'' go inside the same cluster independently of these design choices. + +Fine-tuning on a supervised dataset solves this problem but adds another. +The evaluation no longer assesses the proficiency of a model to learn from unlabeled data alone; it also evaluates its ability to adapt to labeled samples. +Furthermore, the smaller the labeled dataset is, the more results have high variance. +On the other hand, the larger the labeled dataset is, the less the experiment evaluates the unsupervised phase. + +A few-shot evaluation can be used to answer these caveats. +Instead of evaluating a clustering of the samples, few-shot experiments evaluate a similarity function between samples: \(\operatorname{sim}\colon \dataSet\times\dataSet\to\symbb{R}\). +Given a query sample \(x^{(q)}\) and a set of candidates \(\vctr{x}^{(c)}=\{x_i^{(c)}\mid i=1,\dotsc,C\}\), % +\begin{marginparagraph} + \(C\) is the number of candidates, in Table~\ref{tab:relation extraction:few-shot problem} we have \(C=5\). +\end{marginparagraph} +the model is evaluated on whether it is able to find the candidate conveying the same relation as the query. +This is simply reported as an accuracy by comparing \(\argmax_{x\in\vctr{x}^{(c)}} \operatorname{sim}(x^{(q)}, x)\) with the correct candidate. + +\begin{table}[ht!] + \centering + \input{mainmatter/relation extraction/few-shot problem.tex} + \sidecaption[Few-shot problem.]{ + Few-shot problem. + For ease of reading, the entity identifiers---such as \wdent{450036} for ``Hörsel''---are not given. + Both the query and the third candidate convey the relation \wdrel{206} \textsl{located in or next to body of water}. + \label{tab:relation extraction:few-shot problem} + } +\end{table} + +Table~\ref{tab:relation extraction:few-shot problem} gives an example of a few-shot problem. +It illustrates the five-way one-shot problem, meaning that we must choose a relation among five and that each of the five relations is represented by a single sample. +Another popular variant is the ten-way five-shot problem: the candidates are split into ten bags of five samples each, all samples in a bag convey the same relation, and the goal is to predict the bag in which the query belongs. +\begin{marginparagraph} + Quite confusingly, they can also be referred to as ``meta-train'' and ``meta-test.'' + Indeed, to follow the usual semantic of the ``meta-'' prefix, the ``meta-sets'' should refer to sets of \((\text{query}, \text{candidates})\) tuples, not the candidates themselves. +\end{marginparagraph} +Candidates are sometimes referred to as ``train set'' and the query as ``test set'' since this can be seen as an extremely small dataset with five training samples and one test sample. + +FewRel, described in Section~\ref{sec:datasets:fewrel}, is the standard few-shot dataset. +In FewRel, Altair is not \wdrel{361} \textsl{part of} Aquila, it is \wdrel{59} \textsl{part of constellation} Aquila. +However, this design decision does not influence the evaluation. +Given the query ``Altair is located in the Aquila constellation,'' a model ought to rank this sample as more similar to samples conveying \textsl{part of constellation} than to those conveying other kinds of \textsl{part of} relationships. +If FewRel made the opposite design choice, the model would still be able to achieve high accuracy by ensuring \textsl{part of} samples are similar. +The decision to split or not the \textsl{part of} relation should be of no concern to the unsupervised model. + +\subsection{Open Information Extraction} +\label{sec:relation extraction:oie} +In Open information extraction (\textsc{oie}, \citex{oie}), the closed-domain assumption (Section~\ref{sec:relation extraction:domain restriction}) is neither made for relations nor entities, which are extracted jointly. +Instead \(\entitySet\) and \(\relationSet\) are implicitly defined from the language itself, typically a fact \((e_1, r, e_2)\) is expressed as a triplet such as (noun phrase, verb phrase, noun phrase). +This makes \textsc{oie} particularly interesting when processing large amounts of data from the web, where there can be many unanticipated relations of interest. + +This section focuses on TextRunner, the first model implementing \textsc{oie}. +It uses an aggregate extraction setup where \(\dataSet\) is directly mapped to \(\kbSet\), with the peculiarity that \(\kbSet\) is defined using surface forms only. +The hypothesis on which TextRunner relies is that the surface form of the relation conveyed by a sentence appears in the path between the two entities in its dependency tree. +In the \textsc{oie} setup, these surface forms can then be used as labels for the conveyed relations, thereby using the language itself as the relation domain \(\relationSet\). +TextRunner can be split into three parts: +\begin{description} + \item[The Learner] is a naive Bayes classifier, trained on a small dataset to predict whether a fact \((e_1, r, e_2)\) is trustworthy. + To extract a set of samples for this task, a dependency parser (Figure~\ref{fig:relation extraction:dependency tree}) is run on the dataset and tuples \((e_1, r, e_2)\) are extracted where \(e_1\) and \(e_2\) are base noun phrases and \(r\) is the dependency path between the two entities. + The tuples are then automatically labeled as trustworthy or not according to a set of heuristics such as the length of the dependency path and whether it crosses a sentence boundary. + The naive Bayes classifier is then trained to predict the trustworthiness of a tuple given a set of hand-engineered features (Section~\ref{sec:relation extraction:hand-designed features}). + \item[The Extractor] extracts trustworthy facts on the whole dataset. + The features on which the Learner is built only depend on part-of-speech (\textsc{pos}) tags (noun, verb, adjective\dots) such that the Extractor does not need to run a dependency parser on all the sentences in the entire dataset. + \begin{marginparagraph} + Dependency parsers tend to be a lot slower than \textsc{pos} taggers. + \end{marginparagraph} + While the Learner uses the dependency path for \(r\), the Extractor uses the infix from which non-essential phrases (such as adverbs) are eliminated heuristically. + Thus the Extractor simply runs a \textsc{pos} tagger on all sentences, finds all possible entities \(e\), estimates a probable relation \(r\) and filters them using the Learner to output a set of trustworthy facts. + \item[The Assessor] assigns a probability that a fact is true from redundancy in the dataset using the urns model of \textcite{textrunner_assessor}. + This model uses a binomial distribution to model the probability that a correct fact appears \(k\) times among \(n\) extractions with a fixed repetition rate. + Furthermore, it assumes both correct and incorrect facts follow different Zipf's laws. + The shape parameter \(s_I\) of the distribution of incorrect facts is assumed to be 1. + While the shape parameter \(s_C\) of the distribution of correct facts as well as the number of correct facts \(N_C\) are estimated using an expectation--maximization algorithm. + \begin{marginparagraph}[-3mm] + Zipf's law comes from the externalist linguistic school. + It follows from the observation that the frequency of the second most common word is half the one of the most frequent word, that the one of the third most common word is a third of the one of the most frequent, etc. + The same distribution can often be observed in information extraction. + Zipf's law is parametrized by a shape \(s\) and the number of elements \(N\): + \begin{equation*} + P(x\mid s) \propto + \begin{cases} + x^{-s} & \text{ for } x\in\{1,\dotsc,N\} \\ + 0 & \text{ otherwise } \\ + \end{cases} + \end{equation*} + A Zipf's law is easily recognizable on a \(\log\)--\(\log\) scale, its probability mass function being a straight line. + Take for example the Zipf's law with parameters \(s=2\) and \(N=10\): + \begin{center} + \input{mainmatter/relation extraction/zipf.tex} + \end{center} + \end{marginparagraph} + In the expectation step, the binomial and Zipf distribution assumptions can be combined using Bayes' theorem to estimate whether a fact is correct or not. + In the maximization step, the parameters \(s_C\) and \(N_C\) are estimated. +\end{description} + +\textcite{oie} compare their approach to KnowItAll, an earlier work similar to \textsc{oie} but needing a list of relations (surface forms) as input to define the target relation schema \(\relationSet\). +On a set of ten relations, they manually labeled the extracted facts as correct or not, obtaining an error rate of 12\% for TextRunner and 18\% for KnowItAll. +They further run their model on 9 million web pages, extracting 7.8 million facts. + +A limitation of the \textsc{oie} approach is that it heavily depends on the raw surface form and suffers from bad generalization. +The two facts ``Bletchley Park \textsl{known as} Station X'' and ``Bletchley Park \textsl{codenamed} Station X'' are considered different by TextRunner since the surface forms conveying the relations in the underlying sentences are different. +Subsequent \textsc{oie} approaches try to address this problem, such as \textcite{textrunner_synonym}, which extend TextRunner with a resolver \parencite{textrunner_resolver} to merge synonyms. +However, this problem is not overcome yet and is still an active area of research. +Furthermore, since the input of \textsc{oie} systems is often taken to be the largest possible chunk of the web, and since the extracted facts do not follow a strict nomenclature, a fair evaluation of \textsc{oie} systems among themselves or to other unsupervised relation extraction models is still not feasible. + +\subsection{Clustering Surface Forms} +\label{sec:relation extraction:hasegawa} +The first unsupervised relation extraction model was the clustering approach of \textcitex{hasegawa}. +It is somewhat similar to \textsc{dirt} (Section~\ref{sec:relation extraction:dirt}) in that it uses a similarity between samples. +However, their work goes one step further by using this similarity to build relation classes. +Furthermore, \textcite{hasegawa} does not assume \hypothesis{pullback}, i.e.~it does not assume that the sentence and entities convey the relation separately, on their own. +Instead, its basic assumption is that the infix between two entities is the expression of the conveyed relation. +\begin{marginparagraph} + As a reminder, the infix is the span of text between the two entities in the sentence. +\end{marginparagraph} +As such, if two infixes are similar, the sentences convey similar relations. +Furthermore, \textsc{ner} (see the introduction of Chapter~\ref{chap:relation extraction}) is performed on the text instead of simple entity chunking. +This means that all entities are tagged with a type such as ``organization'' and ``person.'' +These types strongly constrain the relations through the following assumption: +\begin{marginparagraph}[-9mm] + Following Section~\ref{sec:context:relation algebra}, \(\breve{r}\) is the converse relation of \(r\), i.e.~the relation with \(e_1\) and \(e_2\) in the reverse order. + \(\relationComposition\) is the composition operator and \(\relationOne_X\) the complete relation over \(X\). + \(r\relationComposition\breve{r}\) is the relation linking all the entities which appear as subject (\(e_1\), on the left hand side) of \(r\) to themselves. + This relation is constrained to be between entities in \(X\). + Less relevant to this formula, \(r\relationComposition\breve{r}\) also links together entities linked by \(r\) to the same object. +\end{marginparagraph} +\begin{assumption}{type} + All entities have a unique type, and all relations are left and right restricted to one of these types. + + \smallskip + \noindent + \( + \exists \symcal{T} \textup{ partition of } \entitySet : + \forall r\in\relationSet : + \exists X, Y\in \symcal{T} : + r\relationComposition \breve{r} \relationOr \relationOne_X = \relationOne_X + \; \land \; + \breve{r}\relationComposition r \relationOr \relationOne_Y = \relationOne_Y + \) +\end{assumption} +\begin{marginparagraph}[9mm] + Here, we assume that the partition \(\symcal{T}\) is not degenerate and somewhat looks like a standard \textsc{ner} classification output. + Otherwise, \(\symcal{T}=\{\entitySet\}\) is a valid partition of \(\entitySet\), and this assumption is tautological. +\end{marginparagraph} +This is a natural assumption for many relations; for example, the relation \textsl{born in} is always between a person and a geopolitical entity (\textsc{gpe}). + +Given a pair of entities \((e_1, e_2)\in\entitySet^2\), \textcite{hasegawa} collect all samples in which they appear and extract a single vector representation from all these samples. +This representation is built from the bag of words of the infixes weighted by \textsc{tf--idf} (term frequency--inverse document frequency). +Since a bag of words discards the ordering of the words or entities, the variant of \textsc{tf--idf} used takes into account the directionality: +\begin{align*} + \textsc{tf}(w, e_1, e_2) = & + \text{number of times \(w\) appears between \(e_1\) and \(e_2\)} \\ + & \hspace{5mm} - \text{number of times \(w\) appears between \(e_2\) and \(e_1\)} \\ + \textsc{idf}(w) = & (\text{number of documents in which \(w\) appears})^{-1} + \\ + \textsc{tf--idf}(w, e_1, e_2) = & \textsc{tf}(w, e_1, e_2) \cdot \textsc{idf}(w) +\end{align*} + +From this definition we obtain a representation \(\vctr{z}_{e_1, e_2}\in\symbb{R}^{V}\) of the pair \((e_1, e_2)\in\entitySet^2\) by taking the value of \(\textsc{tf--idf}(w, e_1, e_2)\) for all \(w\in V\). +Given two entity pairs, their similarity is defined as follow: +\begin{equation*} + \operatorname{sim}(\vctr{e}, \vctr{e}') + = \cos(\vctr{z}_\vctr{e}, \vctr{z}_\vctr{e'}) + = \frac{\vctr{z}_\vctr{e} \cdot \vctr{z}_\vctr{e'}}{\|\vctr{z}_\vctr{e}\| \|\vctr{z}_\vctr{e'}\|}. +\end{equation*} + +Using this similarity function, the complete-linkage clustering algorithm% +\sidenote{ + The complete-linkage algorithm is an agglomerative hierarchical clustering method also called farthest neighbor clustering. + The algorithm starts with each sample in its own cluster then merges the clusters two by two until reaching the desired number of clusters. + At each step, the two closest clusters are merged together, with the distance between clusters being defined as the distance between their farthest elements. +} +\parencite{complete_linkage} is used to extract relations classes. +Since each pair end up in a single cluster, this assumes \hypothesis{1-adjacency}. +\Textcite{hasegawa} evaluate their method on articles from the New York Times (\textsc{nyt}). +They extract relations classes by first clustering all \(\vctr{z}_{e_1, e_2}\) where \(e_1\) has the type person and \(e_2\) has the type \textsc{gpe}, and then by clustering all \(\vctr{z}_{e_1, e_2}\) where both \(e_1\) and \(e_2\) are organizations. +By clustering separately different type combinations, they ensure that \hypothesis{type} is enforced. + +They furthermore experiment with automatic labeling of the clusters with the most frequent word appearing in the samples. +Apart from the relation \textsl{prime minister}, which is simply labeled ``minister'' since only unigrams are considered, the labels are rather on point. +To measure the performance of their model, they use a classical supervised \fone{} where each cluster is labeled by the majority gold relation. +Using this somewhat unadapted metric, they reach an \fone{} of 82\% on person--\textsc{gpe} pairs and an \fone{} of 77\% on organization--organization pairs. +This relatively high score compared to subsequent models can be explained by the small size of their dataset, which is further split by entity type. +Furthermore, note that some generic relations such as \textsl{part of} do not follow \hypothesis{type} and, as such, cannot be captured. + +\subsection{Rel-\textsc{lda}} +\label{sec:relation extraction:rellda} +Rel-\textsc{lda} \parencitex{rellda} is a probabilistic generative model inspired by \textsc{lda}. +It works by clustering sentences: each relation defines a distribution over a handcrafted set of sentence features (Section~\ref{sec:relation extraction:hand-designed features}) describing the relationship between the two entities in the text. +Furthermore, rel-\textsc{lda} models the propensity of a relation at the level of the document; thus, it is not strictly speaking a sentence-level relation extractor. +The idea behind modeling this additional information is that when a relation such as \wdrel{413} \textsl{position played on team} appears in a document, other relations pertaining to sports are more likely to appear. +Figure~\ref{fig:relation extraction:rellda plate} gives the plate diagram for the rel-\textsc{lda} model. +It uses the following variables: +\begin{itemize}[nosep,label={}] + \item \(\rndmvctr{f}_i\) the features of the \(i\)-th sample, where \(\rndm{f}_{ij}\) is its \(j\)-th feature + \item \(\rndm{r}_i\) the relation of the \(i\)-th sample + \item \(\rndm{\theta}_d\) the distribution of relations in the document \(d\) + \item \(\rndm{\phi}_{rj}\) the probability of the \(j\)-th feature to occurs for the relation \(r\) + \item \(\alpha\) the Dirichlet prior for \(\rndm{\theta}_d\) + \item \(\beta\) the Dirichlet prior for \(\rndm{\phi}_{rj}\) +\end{itemize} +The generative process is listed as Algorithm~\ref{alg:relation extraction:rellda generation}. +The learning process uses the expectation--maximization algorithm. +In the variational E-step, the relation for each sample \(r_i\) is sampled from the categorical distribution: +\begin{equation*} + P(r_i\mid \vctr{f}_i, d) \propto P(r_i\mid d) \prod_{j=1}^m P(f_{ij}\mid r_i) +\end{equation*} +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/rellda plate.tex} + \scaption[Rel-\textsc{lda} plate diagram.]{ + Rel-\textsc{lda} plate diagram. + \(D\) is the number of documents in the dataset and \(n_d\) is the number of samples in the document \(d\). + For each sample \(i\), there are several features \(\rndm{f}_{i1}, \rndm{f}_{i2}, \dotsc, \rndm{f}_{im}\), accordingly for each relation \(r\), there are also several feature priors \(\rndm{\phi}_{r1}, \dotsc, \rndm{\phi}_{rm}\), however for simplicity, a single prior is shown here. + \label{fig:relation extraction:rellda plate} + } +\end{marginfigure}% +\begin{marginalgorithm} + \centering + \input{mainmatter/relation extraction/rellda.tex} + \scaption[The rel-\textsc{lda} generative process.]{ + The rel-\textsc{lda} generative process. + \(\operatorname{Dir}\) are Dirichlet distributions. + \(\operatorname{Cat}\) are categorical distributions. + \label{alg:relation extraction:rellda generation} + } +\end{marginalgorithm}% +where \(P(r\mid d)\) is defined by \(\theta_d\) and \(P(f_{ij}\mid r)\) is defined by \(\phi_{rj}\). +In the M-step, the values for \(\theta_d\) are computed by counting the number of times each relation appears in \(d\) and the hyperprior \(\alpha\); and the value for \(\phi_{rj}\) is computed from the number of co-occurrences of the \(j\)-th feature with the relation \(r\) and from \(\beta\). + +\Textcite{rellda} evaluate their model on the New York Times by comparing their clusters to relations in Freebase. +However, because of the incompleteness of knowledge bases, they only evaluate the recall on Freebase and use manual annotation to estimate the precision. +Even though the original article lacks a significant comparison, subsequent approaches often compare to rel-\textsc{lda}. + +A first limitation of their approach is that given the relation \(r\), the features \(f\) are independents. +Since the entities are among those features, this means that \(P(e_2\mid e_1, r) = P(e_2\mid r)\) which is clearly false. +\begin{assumption}{biclique} + Given a relation, the entities are independent of one another: \( \displaystyle \rndm{e}_1 \independent \rndm{e}_2 \mid \rndm{r} \). + In other words, given a relation, all possible head entities are connected to all possible tail entities. + + \smallskip + \noindent + \(\forall r\in\relationSet:\exists A,B\subseteq\entitySet: r\relationComposition\breve{r}=\relationOne_A\land\breve{r}\relationComposition r=\relationOne_B\) +\end{assumption} +This is a widespread problem with generative models which are inclined to make extensive independence assumptions. +Furthermore, generative models have an implicit bias that all observed features are related to relation extraction, even though they might measures other aspect of the sample (style, idiolectal word choice, etc). +This might results in the model focusing on features not related to the relation extraction task. + +Several extensions of rel-\textsc{lda} were proposed. +Type-\textsc{lda} \parencite{rellda} purpose to model entity types which are latent variables of entity features, themselves generated from the relation variable \(r\), thus softly enforcing \hypothesis{type}. +Sense-\textsc{lda} \parencitex{rellda_sense} use a \textsc{lda}-like model for each different dependency path. +Clusters for different paths are then merged into relation clusters. + +Rel-\textsc{lda} is an important work in that it proposes a simple evaluation framework; in particular, it introduces the \bcubed{} metric to unsupervised relation extraction. +However, it predates the advent of neural networks and distributed representations in relation extraction, by which it was bound to be replaced. + +\subsection{Variational Autoencoder for Relation Extraction} +\label{sec:relation extraction:vae} +\Textcitex{vae_re} were first to propose a discriminative unsupervised relation extraction model. +Discriminative models directly solve the inference problem of finding the posterior \(P(r\mid x)\). +This is in contrast to generative models such as rel-\textsc{lda} which determine \(P(x\mid r)\) and then use Bayes' theorem to compute \(P(r\mid x)\) and make a prediction. +The model of \textcite{vae_re} is closely related to the approach presented in Chapter~\ref{chap:fitb}. +It is a clustering model, meaning that it produces clusters of samples where the samples in each cluster all convey the same relation. +To do so, it uses a variational autoencoder model (\textsc{vae}, \citex{vae}) that we now describe. + +\paragraph{Variational Autoencoder} +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/vae plate.tex} + \scaption[\textsc{vae} plate diagram.]{ + \textsc{vae} plate diagram. + \(N\) is the number of samples in the dataset. + \label{fig:relation extraction:vae plate} + } +\end{marginfigure} +The goal of a variational autoencoder is to learn a latent variable \(\vctr{z}\) which explains the distribution of an observed variable \(\vctr{x}\). +For our problem, the latent variable corresponds to the relation conveyed by the sample \(\vctr{x}\). +We assume we know the generative process \(P(\vctr{x}\mid \vctr{z}; \vctr{\theta})\), i.e.~this process is the ``decoder'' (parametrized by \(\vctr{\theta}\)): given the latent variable it produces a sample. +However, the process of interest to us is to estimate the latent variable---the relation---from a sample, that is \(P(\vctr{z}\mid\vctr{x}; \vctr{\theta})\). +Using Bayes' theorem we can reformulate this posterior as \(P(\vctr{x}\mid \vctr{z}; \vctr{\theta})P(\vctr{z}\mid \vctr{\theta}) \divslash P(\vctr{x}\mid \vctr{\theta})\). +However, computing \(P(\vctr{x}\mid \vctr{\theta})\) is often intractable, especially when the likelihood \(P(\vctr{x}\mid \vctr{z}; \vctr{\theta})\) is modeled using a complicated function like a neural network. +To solve this problem, a variational approach is used: another model \(Q\) parametrized by \(\vctr{\phi}\) is used to approximate \(P(\vctr{z}\mid\vctr{x}; \vctr{\theta})\) as well as possible. +This approximation \(Q(\vctr{z}\mid\vctr{x};\vctr{\phi})\) is the ``encoder'' since it finds the latent variable associated with a sample. +The model can then be trained by maximizing the log-likelihood given the latent variable estimated by \(Q\) and by minimizing the difference between the latent variable predicted by \(Q\) and the desired prior \(P(\vctr{z}\mid\vctr{\theta})\): +\begin{equation} + J_\textsc{elbo}(\vctr{\theta}, \vctr{\phi}) = \expectation_{Q(\vctr{z}\mid \vctr{x}; \vctr{\phi})}[\log P(\vctr{x}\mid\vctr{z};\vctr{\theta})] - \kl(Q(\vctr{z}\mid \vctr{x}; \vctr{\phi}) \mathrel{\|} P(\vctr{z}\mid\vctr{\theta})) + \label{eq:relation extraction:elbo} +\end{equation} +A justification for this objective can also be found in the fact that it's a lower bound of the log marginal likelihood \(\log P(\vctr{x}\mid \vctr{\theta})\), hence its name: evidence lower bound (\textsc{elbo}). +The first part of the objective is often referred to as the negative reconstruction loss since it seeks to reconstruct the sample \(\vctr{x}\) after it went through the encoder \(Q\) and the decoder \(P\). +One last problem with the \textsc{vae} approximation relates to the reconstruction loss, the estimation of the expectation over \(Q(\vctr{z}\mid\vctr{x};\vctr{\phi})\) not being differentiable which makes the model---in particular \(\vctr{\phi}\)---untrainable by gradient descent. +This is usually solved using the reparameterization trick: sampling from \(Q(\vctr{z}\mid\vctr{x};\vctr{\phi})\) can often be done in a two steps process: sampling from a simple distribution like \(\epsilon\sim\normalDistribution(0, 1)\) then transforming this sample using a deterministic process parametrized by \(\vctr{\phi}\). +The plate diagram of the \textsc{vae} is given Figure~\ref{fig:relation extraction:vae plate} where the model \(P\) is marked with solid lines and the variational approximation \(Q\) is marked with dashed lines. + +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/marcheggiani plate.tex} + \scaption[\textcite{vae_re} plate diagram.]{ + \textcite{vae_re} plate diagram. + \label{fig:relation extraction:marcheggiani plate} + } +\end{marginfigure} + +\bigskip + +Coming back to the model of \textcite{vae_re}, it is a conditional \(\beta\)\textsc{-vae},% +\sidenote{ + The \(\beta\) in ``\(\beta\)\textsc{-vae}'' simply indicates that the Kullback--Leibler term in Equation~\ref{eq:relation extraction:elbo} is weighted by a hyperparameter \(\beta\). + More details are given in Chapter~\ref{chap:fitb}. +} +i.e.~the whole process is conditioned on an additional variable. +Indeed, in their approach, only the entities \(\vctr{e}\in\entitySet^2\) are reconstructed, while the sentence \(s\in\sentenceSet\) simply conditions the whole process. +The latent variable explaining the observed entities is expected to be the relation conveyed by the sample. +The resulting model's plate diagram is given in Figure~\ref{fig:relation extraction:marcheggiani plate}. +This approach is defined by two models: +\begin{description} + \item[The Encoder] \(Q(\rndm{r}\mid\vctr{e}, s; \vctr{\phi})\) is the relation extraction model properly speaking. + It is defined as a linear model on top of handcrafted features (Section~\ref{sec:relation extraction:hand-designed features}). + For each sample, the model outputs a distribution over a predefined number of relations. + \item[The Decoder] \(P(\vctr{e}\mid r; \vctr{\theta})\) is a model estimating how likely it is for two entities to be linked by a relation. + It is a reconstruction model since the entities \(\vctr{e}\) are known and need to be retrieved from the latent relation \(r\) sampled from the encoder. + It is defined using selectional preferences (Section~\ref{sec:context:selectional preferences}) and \textsc{rescal} (Section~\ref{sec:context:rescal}). +\end{description} +Note that to label a sample \((\vctr{e}, s)\in\dataSet\), \textcite{vae_re} simply select \(\argmax_{r\in\relationSet} Q(r\mid \vctr{e}, s; \vctr{\phi})\), meaning that the decoder is not used during evaluation. +Its sole purpose is to provide a supervision signal to the encoder through the maximization of \(J_\textsc{elbo}\). +The whole autoencoder can also be interpreted as being trained by a surrogate task of filling-in entity blanks. +This is the interpretation we use in Chapter~\ref{chap:fitb}. + +For Equation~\ref{eq:relation extraction:elbo} to be well defined, a prior on the relations must also be selected; \textcite{vae_re} make the following assumption: +\begin{assumption}{uniform} + All relations occur with equal frequency. + + \smallskip + \noindent + \( \displaystyle \forall r\in\relationSet\colon P(r) = \frac{1}{|\relationSet|} \) +\end{assumption} + +They evaluate their approach on the New York Times distantly supervised by Freebase. +By inducing 100 clusters, they show an improvement of the \bcubed{} \fone{} compared to \textsc{dirt} (Section~\ref{sec:relation extraction:dirt}) and rel-\textsc{lda} (Section~\ref{sec:relation extraction:rellda}). +They also experiment using semi-supervised evaluation (Section~\ref{sec:relation extraction:unsupervised evaluation}) by pre-training their decoder on a subset of Freebase before training their encoder as described above; this additional supervision improves the \fone{} by more than 27\%. +These results were further improved by \textcite{vae_re2}, which proposed to split the latent variable into a relation \(r\) and sentence information \(z\), with \(z\) conditioned on \(r\) and using a loss including the reconstruction of the sentence \(s\) from \(z\). + +\subsection{Matching the Blanks} +\label{sec:relation extraction:mtb} +Matching the blanks (\textsc{mtb}, \citex{mtb}) is an unsupervised method that does not attempt to cluster samples but rather learns a representation of the relational semantics they convey. +More precisely, this representation is used to measure the similarity between samples such that similar samples convey similar relations. +As such, it is either evaluated as a supervised pre-training method (Section~\ref{sec:relation extraction:unsupervised evaluation}) or using a few-shot dataset (Section~\ref{sec:relation extraction:few-shot}). +The \textsc{mtb} article introduces several methods to extract an entity-aware representation of a sentence using \textsc{bert}; this was discussed in Section~\ref{sec:relation extraction:mtb sentential}. +This section focuses on the unsupervised training. +As a reminder, we refer to sentence encoder of \textsc{mtb} by the function \(\bertcoder\colon\sentenceSet\to\symbb{R}^d\) illustrated Figure~\ref{fig:relation extraction:emes}. +Given this encoder, \textsc{mtb} defines the similarity between samples as: +\begin{equation} + \operatorname{sim}(s, s') = \sigmoid(\bertcoder(s)\transpose\bertcoder(s')) + \label{eq:relation extraction:mtb similarity} +\end{equation} +This similarity function can be used to evaluate the model on a few-shot task. +Note that this function completely ignores entities identifiers (e.g.\ \wdent{211539}), but can still exploit the entities surface forms (e.g.\ ``Peter Singer'') through the sentence \(s\in\sentenceSet\). +This model can be used as is, without any training other than the masked language model pre-training of \textsc{bert} (Section~\ref{sec:context:mlm}) and reach an accuracy of 72.9\% on the FewRel 5 way 1 shot dataset. + +\Textcite{mtb} propose a training objective to fine-tune \textsc{bert} for the unsupervised relation extraction task. +This objective is called matching the blanks. +It assumes that two sentences containing the same entities convey the same relation. +This is exactly \hypothesis{1-adjacency} as given Section~\refAssumptionSection{oneadjacency}. +The probability that two sentences convey the same relation (\(\rndm{D}=1\)) is taken from the similarity function: \(P(\rndm{D}=1\mid s, s')=\operatorname{sim}(s, s')\). +Given this, the \hypothesis{1-adjacency} assumption is translated into the following negative sampling (Section~\ref{sec:context:negative sampling}) loss: +\begin{equation} + \loss{mtb} = \frac{-1}{|\dataSet|^2} \sum_{\substack{(\vctr{e}, s)\in\dataSet\\(\vctr{e}', s')\in\dataSet}} + \begin{array}[t]{l} + \delta_{\vctr{e},\vctr{e}'} \log P(\rndm{D}=1\mid s, s') \\ + \hspace{1cm} + (1 - \delta_{\vctr{e},\vctr{e}'}) \log P(\rndm{D}=0\mid s, s') \\ + \end{array} + \label{eq:relation extraction:mtb loss} +\end{equation} +This loss is minimized through gradient descent by sampling random positive and negative sentence pairs. +These pairs can be obtained by comparing the entity identifier without the need for any supervision. + +A problem with this approach is that the \bertcoder{} model can simply learn to perform entity linking on the entities surface forms in the sentences \(s\), thus minimizing Equation~\ref{eq:relation extraction:mtb loss} by predicting whether \(\vctr{e}=\vctr{e}'\). +We want to avoid this since this would only work on samples seen during training and would not generalize to unseen entities. +To ensure the model predicts whether the samples convey the same relation from the sentences \(s\) and \(s'\) alone, blanks are introduced. +A special token \blanktag{} is substituted to the entities as follow: +\begin{indentedexample} + \uhead{\blanktag}, inspired by Cale's earlier cover, recorded one of the most acclaimed versions of ``\utail{\blanktag}.''\\ + \smallskip + \uhead{\blanktag}'s rendition of ``\utail{\blanktag}'' has been called ``one of the great songs'' by Time\dots +\end{indentedexample} +This is similar to the sample corruption of \textsc{bert} (Section~\ref{sec:context:mlm}), indeed like \textsc{bert}, the entity surface forms are blanked only a fraction% +\sidenote{\Textcite{mtb} blanks each entity with a probability of 70\%, meaning that only 9\% of training samples have both of their entity surface forms intact.} +of the time so as to not confuse the model when real entities appear during evaluation. + +Another problem with Equation~\ref{eq:relation extraction:mtb loss} is that the negative sample space \(\vctr{e}\neq\vctr{e}'\) is extremely large. +Instead of taking negative samples randomly in this space, \textcite{mtb} propose to take only samples which are likely to be close to positive ones. +To this end, the \(\vctr{e}\neq\vctr{e}'\) condition is actually replaced with the following one: +\begin{equation*} + |\{e_1, e_2\} \cap \{e_1', e_2'\}| = 1 +\end{equation*} +These are called ``strong negatives'': negative samples that have precisely one entity in common. +Negative sampling, especially with strong negatives, leads to another unfortunate assumption: +\begin{assumption}[onetoone]{\(1\to1\)} + All relations are one-to-one. + + \smallskip + \noindent + \( \forall r\in\relationSet\colon + r\relationComposition \breve{r} \relationOr \relationIdentity + = \breve{r}\relationComposition r \relationOr \relationIdentity + = \relationIdentity \) +\end{assumption} +Indeed, if a relation is not one-to-one, then there exists two facts \tripletHolds{e_1}{r}{e_2} and \tripletHolds{e_1}{r}{e_3} (or respectively with \(\breve{r}\)); however these two facts form a strong negative pair, therefore as per \loss{mtb} their representations must be pulled away from one another. + +Despite these assumptions, \textsc{mtb} showcase impressive results, both as a few-shot and supervised pre-training method. +It obtained state-of-the-art results both on the SemEval 2010 Task 8 dataset with a macro-\(\overHalfdirected{\fone}\) % +\begin{marginparagraph}[-6mm] + As a reminder, \(\overHalfdirected{\fone}\) is the half-directed metric described Section~\ref{sec:relation extraction:supervised evaluation}. + It is referred to as ``taking directionality into account'' in the SemEval dataset. +\end{marginparagraph} +of 82.7\% and on FewRel with an accuracy of 90.1\% on the 5 way 1 shot task. + +\subsection{Self\textsc{ore}} +\label{sec:relation extraction:selfore} +Self\textsc{ore} \parencitex{selfore} is a clustering approach similar to the one of \textcite{hasegawa} presented in Section~\ref{sec:relation extraction:hasegawa} but using deep neural network models for extracting sentence representations and for grouping these representations into relation clusters. +Since they follow the experimental setup of \textcite{fitb}, which we present in Chapter~\ref{chap:fitb}, their results are listed in that chapter. + +Self\textsc{ore} uses \textsc{mtb}'s entity markers--entity start \bertcoder{} sentence representation. +A clustering algorithm could be run to produce relation classes from these representations a la \textcite{hasegawa}. +However, \textcite{selfore} introduce an iterative scheme to purify the clusters. +This scheme is illustrated in Figure~\ref{fig:relation extraction:selfore} and works by alternatively optimizing two losses \loss{ac} and \loss{rc}. + +The first loss \loss{ac} is the clustering loss which comes from \textsc{dec} \parencitex{dec}. +\textsc{dec} is a deep clustering algorithm that uses a denoising autoencoder \parencite{dae} to compress the input. +In their case, the input \(\vctr{h}\) is the sentence encoded by \bertcoder{}. +The denoising autoencoder is trained layer by layer with a small bottleneck which produces a compressed representation of the sentence \(\vctr{z}=\operatorname{Encoder}(\vctr{h})\). +This is the space in which the clustering occurs. +For each cluster \(j=1,\dotsc,K\), a centroid% +\sidenote{ + The \(k\)-means clustering algorithm is used to initialize the centroids. + In practice, the \(k\)-means clusters could directly be used as soft labels. + However, \textcite{selfore} show that this underperforms compared to refining the clusters with \loss{ac}. +} +\(\vctr{\mu}_j\) is learned such that a sentence is part of the cluster whose centroid is the closest to its compressed representation. +This is modeled with a Student's \(t\)-distribution with one degree of freedom centered around the centroid: +\begin{marginfigure} + \centering + \input{mainmatter/relation extraction/selfore.tex} + \scaption[Self\textsc{ore} iterative algorithm.]{ + Self\textsc{ore} iterative algorithm. + \label{fig:relation extraction:selfore} + } +\end{marginfigure} +\begin{equation*} + q_{ij} = \frac{(1+\|\vctr{z}_i-\vctr{\mu}_j\|^2)^{-1}}{\sum_k (1+\|\vctr{z}_i-\vctr{\mu}_k\|^2)^{-1}} +\end{equation*} +To force the initial clusters to be more distinct, a target distribution \(p\) is defined as: +\begin{equation} + p_{ij} = \frac{q_{ij}^2 \divslash f_j}{\sum_k q_{ik}^2 \divslash f_k} + \label{eq:relation extraction:selfore target} +\end{equation} +where \(f_j=\sum_i q_{ij}\) are soft cluster frequencies. +To push \(\mtrx{Q}\) towards \(\mtrx{P}\), a Kullback--Leibler divergence is used: +\begin{equation*} + \loss{ac} = \kl(\mtrx{P}\mathrel{\|}\mtrx{Q}) = \sum_{i=1}^{|\dataSet|} \sum_{j=1}^K p_{ij} \log \frac{p_{ij}}{q_{ij}} +\end{equation*} +This loss is minimized by backpropagating to the cluster centroids \(\vctr{\mu}_j\) and to the encoder's parameters in the \textsc{dae}. +Note that the decoder of the \textsc{dae} is only used for initializing the encoder such that the input can be reconstructed. + +Optimizing \loss{ac} is the first step of Self\textsc{ore}; it assigns a pseudo-label to each sample in the dataset. +The second step is to train a classifier to predict these pseudo-labels. +The classifier is a simple multi-layer perceptron trained with the usual cross-entropy classification loss, which is called \loss{rc} in Self\textsc{ore}. +This loss also backpropagate to the \bertcoder{} thus changing the sentence representations \(\vctr{h}\). +Self\textsc{ore} is an iterative algorithm: changing the \(\vctr{h}\) modifies the clustering found by \textsc{dec}. +Thus, the two steps, clustering and classification, are repeated several times until a stable label assignment is found. + +The central assumption of Self\textsc{ore} is that \bertcoder{} already produces a good representation for relation extraction, which, as we saw with the non-fine-tuned \bertcoder{} score on FewRel in Section~\ref{sec:relation extraction:mtb}, is rather accurate. +However, Self\textsc{ore} also assumes \hypothesis{uniform}, i.e.~that all relations appear with the same frequency. +This assumption is enforced by \loss{ac}, through the normalization of the target distribution \(\mtrx{P}\) by soft cluster frequencies \(f_j\).% +\sidenote{For further details, \textcite{dec} contains an analysis of the \textsc{dec} clustering algorithm on imbalanced \textsc{mnist} data.} +Indeed, the distribution \(\mtrx{P}\) is the original distribution \(\mtrx{Q}\) more concentrated (because of the square) and more uniform (because of the normalization by \(f_j\)). + +The interpretation of the concentration effect in terms of modeling hypotheses is more complex. +The variable \(\vctr{h}\) is the concatenation of the two entity embeddings. +Let's break down the \bertcoder{} function into two components: \(\operatorname{ctx}_1(s)\) and \(\operatorname{ctx}_2(s)\). +These are simply the two contextualized embeddings of \texttt{<e1>} and \texttt{<e2>} (Section~\ref{sec:relation extraction:mtb}), in other words the function \(\operatorname{ctx}\) contextualize an entity surface form inside its sentence. +When two sentence representations \(\vctr{h}\) and \(\vctr{h}'\) are close, their pseudo-labels tend to be the same, and thus their relation also tend to be the same. +In other words: +\begin{assumption}[ctxoneadjacency]{\ctxoneadj} + Two samples with the same contextualized representation of their entities' surface forms convey the same relation. + + \smallskip + \noindent + \( \forall (s, \vctr{e}, r), (s', \vctr{e}', r')\in\dataSet_\relationSet\colon \)\\ + \null\hfill\( \operatorname{ctx}_1(s)=\operatorname{ctx}_1(s') \land \operatorname{ctx}_2(s)=\operatorname{ctx}_2(s') \implies r=r' \) +\end{assumption} +If we assume \bertcoder{} only performs entity linking of the entities surface form, then \(\operatorname{ctx}_i(s)=e_i\) for \(i=1,2\), in this case \hypothesis{\ctxoneadj} collapses to \hypothesis{1-adjacency}, the contextualization inside the sentence \(s\) is ignored. +On the other hand, if we assume \bertcoder{} provides no information about the entities and only encode the sentence, then \(\operatorname{ctx}_i(s)=s\) for \(i=1,2\) and \hypothesis{\ctxoneadj} only states that the entity identifiers \(\vctr{e}\in\entitySet^2\) should have no influence on the relation. +The effective repercusion of \hypothesis{\ctxoneadj} lies somewhere half-way between these two extremes. diff --git a/mainmatter/relation extraction/vae plate.tex b/mainmatter/relation extraction/vae plate.tex @@ -0,0 +1,17 @@ +\begin{tikzpicture}[node distance=7mm] + \node[pdiag observed] (x) {\(\rndmvctr{x}\)}; + \node[pdiag latent, above=8mm of x] (z) {\(\rndmvctr{z}\)}; + \node[inner sep=1mm, left=of z] (phi) {\(\vctr{\phi}\)}; + \node[inner sep=1mm, right=of x] (theta) {\(\vctr{\theta}\)}; + \draw[arrow] (z) -- (x); + \draw[arrow] (theta) -- (x); + \draw[arrow] (theta) -- (z); + \draw[arrow, dashed] (x) to[out=135, in=-135] (z); + \draw[arrow, dashed] (phi) -- (z); + + \coordinate (plspace1) at ($(x.south) + (0, -2mm)$); + \coordinate (plspace2) at ($(x.east) + (3mm, 0)$); + \coordinate (plspace3) at ($(x.west) + (-3mm, 0)$); + \node[pdiag plate, inner sep=1mm, fit=(x) (z) (plspace1) (plspace2) (plspace3)] (p) {}; + \node[anchor=south east] at (p.south east) {\(N\)}; +\end{tikzpicture} diff --git a/mainmatter/relation extraction/zipf.tex b/mainmatter/relation extraction/zipf.tex @@ -0,0 +1,7 @@ +\begin{tikzpicture} + \begin{loglogaxis}[modern, width=45mm, ymax=1, xtick={1,10}, minor xtick={2,...,9}] + \addplot+[black, mark options={fill=black}, mark size=0.4mm, samples at={1,...,10}] + {x^(-2)/1.5497677311665408}; + % \sum_{i=1}^{10} i^{-2} = 1.5497677311665408 + \end{loglogaxis} +\end{tikzpicture} diff --git a/thesis.bib b/thesis.bib @@ -0,0 +1,2343 @@ +@article{ace_evaluation, + title = {The automatic content extraction (\textsc{ace}) program-tasks, data, and evaluation.}, + author = {Doddington, George R and Mitchell, Alexis and Przybocki, Mark A and Ramshaw, Lance A and Strassel, Stephanie M and Weischedel, Ralph M}, + booktitle = {lrec}, + volume = {2}, + number = {1}, + pages = {837--840}, + year = {2004}, + shortseries = {\textsc{lrec}}, + url = {https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/lrec2004-ace-program.pdf} +} + +@inbook{age_du_capitaine, + title = {Lettre du 16 mai 1843 à sa sœur}, + booktitle = {Correspondance de Gustave Flaubert}, + author = {Louis Conard}, + year = {1926}, + volume = {1}, + pages = {139--140} +} + +@article{ari, + title = {Comparing partitions}, + author = {Lawrence Hubert and Phipps Arabie}, + journal = {Journal of classification}, + volume = {2}, + number = {1}, + pages = {193--218}, + year = {1985}, + month = {12}, + doi = {10.1007/BF01908075}, + issn = {1432-1343}, + shortseries = {\textsc{joc}}, + publisher = {Springer}, + url = {https://link.springer.com/content/pdf/10.1007/BF01908075.pdf} +} + +@inbook{assisted_curation, + author = { Beatrice Alex and Claire Grover and Barry Haddow and Mijail Kabadjov and Ewan Klein and Michael Matthews and Stuart Roebuck and Richard Tobin and Xinglong Wang}, + title = {Assisted curation: does text mining really help?}, + year = {2008}, + booktitle = {Pacific Symposium on Biocomputing}, + volume = {13}, + pages = {556--567}, + url = {https://psb.stanford.edu/psb-online/proceedings/psb08/alex.pdf}, + shortseries = {\textsc{psb}} +} + +@inproceedings{attention, + author = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua}, + editor = {Bengio, Yoshua and LeCun, Yann}, + title = {Neural Machine Translation by Jointly Learning to Align and Translate}, + booktitle = {3rd International Conference on Learning Representations (\textsc{iclr}), Conference Track Proceedings}, + eventdate = {2015-05-07/2015-05-09}, + location = {San Diego, \textsc{ca}, \textsc{usa}}, + shortseries = {\textsc{iclr}}, + year = {2015}, + url = {http://arxiv.org/abs/1409.0473} +} + +@inproceedings{bert, + title = {\textsc{bert}: Pre-training of Deep Bidirectional Transformers for Language Understanding}, + author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, + booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)}, + shortseries = {\textsc{naacl}}, + month = {6}, + year = {2019}, + address = {Minneapolis, Minnesota}, + publisher = {Association for Computational Linguistics}, + url = {https://www.aclweb.org/anthology/N19-1423}, + doi = {10.18653/v1/N19-1423}, + pages = {4171--4186}, +} + +@inproceedings{beta_vae, + title = {\(\beta\)-\textsc{vae}: Learning Basic Visual Concepts with a Constrained Variational Framework}, + author = {Higgins, Irina and Matthey, Loic and Pal, Arka and Burgess, Christopher and Glorot, Xavier and Botvinick, Matthew and Mohamed, Shakir and Lerchner, Alexander}, + booktitle = {International Conference on Learning Representations}, + year = {2017}, + shortseries = {\textsc{iclr}}, + url = {https://openreview.net/forum?id=Sy2fzU9gl} +} + +@inproceedings{bcubed, + title = {Entity-Based Cross-Document Coreferencing Using the Vector Space Model}, + author = {Amit Bagga and Breck Baldwin}, + booktitle = {36th Annual Meeting of the Association for Computational Linguistics and 17th International Conference on Computational Linguistics, Volume 1}, + month = {8}, + year = {1998}, + address = {Montreal, Quebec, Canada}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P98-1012}, + doi = {10.3115/980845.980859}, + shortseries = {\textsc{acl}}, + pages = {79--85} +} + +@misc{binding_symbolic, + title={On the Binding Problem in Artificial Neural Networks}, + author={Klaus Greff and Sjoerd van Steenkiste and Jürgen Schmidhuber}, + year={2020}, + eprint={2012.05208}, + archivePrefix={arXiv}, + primaryClass={cs.NE} +} + +@article{biobert, + author = {Lee, Jinhyuk and Yoon, Wonjin and Kim, Sungdong and Kim, Donghyeon and Kim, Sunkyu and So, Chan Ho and Kang, Jaewoo}, + title = {Bio\textsc{bert}: a pre-trained biomedical language representation model for biomedical text mining}, + journal = {Bioinformatics}, + volume = {36}, + number = {4}, + pages = {1234--1240}, + year = {2019}, + month = {09}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btz682}, + url = {https://academic.oup.com/bioinformatics/article-pdf/36/4/1234/32527770/btz682.pdf} +} + +@inproceedings{blogcatalog, + author = {Tang, Lei and Liu, Huan}, + title = {Relational Learning via Latent Social Dimensions}, + year = {2009}, + isbn = {9781605584959}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + url = {https://dl.acm.org/doi/pdf/10.1145/1557019.1557109}, + doi = {10.1145/1557019.1557109}, + booktitle = {Proceedings of the 15th \textsc{acm} \textsc{sigkdd} International Conference on Knowledge Discovery and Data Mining}, + pages = {817--826}, + numpages = {10}, + location = {Paris, France}, + shortseries = {\textsc{kdd}}, + series = {\textsc{kdd} '09} +} + +@article{bpe, + title = {A new algorithm for data compression}, + author = {Gage, Philip}, + journal = {C Users Journal}, + volume = {12}, + number = {2}, + pages = {23--38}, + year = {1994}, + publisher = {McPherson, KS: R \& D Publications, c1987-1994.} +} + +@inproceedings{bpr, + author = {Steffen Rendle and Christoph Freudenthaler and Zeno Gantner and Lars Schmidt-Thieme}, + title = {\textsc{bpr}: Bayesian Personalized Ranking from Implicit Feedback}, + year = {2009}, + isbn = {9780974903958}, + publisher = {\textsc{auai} Press}, + address = {Arlington, Virginia, \textsc{usa}}, + booktitle = {Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence}, + pages = {452--461}, + numpages = {10}, + shortseries = {\textsc{uai}}, + location = {Montreal, Quebec, Canada}, + doi = {10.5555/1795114.1795167}, + url = {https://dl.acm.org/doi/pdf/10.5555/1795114.1795167}, +} + +@book{calvinandhobbes, + title = {Calvin and Hobbes}, + author = {Bill Watterson}, + date = {1992-05-17} +} + +@inbook{cat, + title = {{Cheshire Cat details from the Tree Above Alice}}, + booktitle = {The Nursery ``Alice''}, + author = {John Tenniel}, + year = {1889}, + addendum = {Via Wikimedia Commons}, + url = {https://commons.wikimedia.org/wiki/File:Tennel_Cheshire_proof.png} +} + +@inproceedings{charrnn, + author = {Sutskever, Ilya and Martens, James and Hinton, Geoffrey}, + title = {Generating Text with Recurrent Neural Networks}, + booktitle = {Proceedings of the 28th International Conference on Machine Learning (\textsc{icml}-11)}, + shortseries = {\textsc{icml}}, + year = {2011}, + editor = {Lise Getoor and Tobias Scheffer}, + location = {Bellevue, Washington, \textsc{usa}}, + isbn = {978-1-4503-0619-5}, + month = {6}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + pages = {1017--1024}, +} + +@inproceedings{chebnet, + author = {Defferrard, Michaël and Bresson, Xavier and Vandergheynst, Pierre}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett}, + publisher = {Curran Associates, Inc.}, + title = {Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering}, + url = {https://proceedings.neurips.cc/paper/2016/file/04df4d434d481c5bb723be1b6df1ee65-Paper.pdf}, + volume = {29}, + shortseries = {\textsc{n}eur\textsc{ips}}, + year = {2016} +} + +@article{chinese_ontology, + author = {Chris Fraser}, + journal = {Philosophy East and West}, + number = {4}, + pages = {420--456}, + publisher = {University of Hawai'i Press}, + title = {Language and Ontology in Early Chinese Thought}, + volume = {57}, + year = {2007}, + issn = {00318221, 15291898}, + url = {http://www.jstor.org/stable/20109423} +} + +@inproceedings{cnn_classification, + title = {Convolutional Neural Networks for Sentence Classification}, + author = {Kim, Yoon}, + booktitle = {Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (\textsc{emnlp})}, + shortseries = {\textsc{emnlp}}, + month = {10}, + year = {2014}, + address = {Doha, Qatar}, + publisher = {Association for Computational Linguistics}, + url = {https://www.aclweb.org/anthology/D14-1181}, + doi = {10.3115/v1/D14-1181}, + pages = {1746--1751}, +} + +@inproceedings{cnn_imagenet, + author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {F. Pereira and C. J. C. Burges and L. Bottou and K. Q. Weinberger}, + publisher = {Curran Associates, Inc.}, + title = {ImageNet Classification with Deep Convolutional Neural Networks}, + url = {https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf}, + volume = {25}, + year = {2012} +} + +@article{commonsense, + title = {Programs with common sense}, + author = {McCarthy, John}, + year = {1959}, + url = {http://www-formal.stanford.edu/jmc/mcc59/mcc59.html} +} + + +@article{complete_linkage, + title = {An efficient algorithm for a complete link method}, + author = {Daniel Defays}, + journal = {The Computer Journal}, + volume = {20}, + number = {4}, + pages = {364--366}, + year = {1977}, + publisher = {Oxford University Press} +} + +@inproceedings{concept_backprop, + title = {Learning distributed representations of concepts}, + author = {Hinton, Geoffrey E}, + booktitle = {Proceedings of the eighth annual conference of the cognitive science society}, + volume = {1}, + pages = {12}, + year = {1986}, + organization = {Amherst, \textsc{ma}, \textsc{usa}}, + url = {https://www.cs.toronto.edu/~hinton/absps/families.pdf} +} + + +@inproceedings{conditional_vae, + author = {Sohn, Kihyuk and Lee, Honglak and Yan, Xinchen}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, + publisher = {Curran Associates, Inc.}, + title = {Learning Structured Output Representation using Deep Conditional Generative Models}, + url = {https://proceedings.neurips.cc/paper/2015/file/8d55a249e6baa5c06772297520da2051-Paper.pdf}, + volume = {28}, + shortseries = {\textsc{n}eur\textsc{ips}}, + year = {2015} +} + +@online{constraints_design, + author = {Maciej Cegłowski}, + title = {Web Design: The First 100 Years}, + year = {2014}, + url = {https://idlewords.com/talks/web_design_first_100_years.htm} +} + +@inproceedings{cove, + author = {McCann, Bryan and Bradbury, James and Xiong, Caiming and Socher, Richard}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett}, + publisher = {Curran Associates, Inc.}, + title = {Learned in Translation: Contextualized Word Vectors}, + url = {https://proceedings.neurips.cc/paper/2017/file/20c86a628232a67e7bd46f76fba7ce12-Paper.pdf}, + volume = {30}, + year = {2017} +} + +@article{dae, + author = {Pascal Vincent and Hugo Larochelle and Isabelle Lajoie and Yoshua Bengio and Pierre-Antoine Manzagol}, + title = {Stacked Denoising Autoencoders: Learning Useful Representations in a Deep Network with a Local Denoising Criterion}, + journal = {Journal of Machine Learning Research}, + year = {2010}, + volume = {11}, + number = {110}, + pages = {3371--3408}, + url = {http://jmlr.org/papers/v11/vincent10a.html} +} + +@inproceedings{dbpedia, + author={Auer, Sören and Bizer, Christian and Kobilarov, Georgi and Lehmann, Jens and Cyganiak, Richard and Ives, Zachary}, + booktitle = {Proceedings of 6th International Semantic Web Conference, 2nd Asian Semantic Web Conference (\textsc{iswc}+\textsc{aswc} 2007)}, + doi = {10.1007/978-3-540-76298-0\_52}, + url = {http://iswc2007.semanticweb.org/papers/715.pdf}, + journal = {The Semantic Web}, + month = {11}, + pages = {722--735}, + title = {\textsc{db}pedia: A Nucleus for a Web of Open Data}, + year = {2008} +} + +@inproceedings{dbpedia_abstracts, + title = {\textsc{db}pedia Abstracts: A Large-Scale, Open, Multilingual \textsc{nlp} Training Corpus}, + author = {Brümmer, Martin and Dojchinovski, Milan and Hellmann, Sebastian}, + booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (\textsc{lrec}'16)}, + month = {5}, + year = {2016}, + address = {Portorož, Slovenia}, + publisher = {European Language Resources Association (\textsc{elra})}, + url = {https://aclanthology.org/L16-1532}, + pages = {3339--3343} +} + +@inproceedings{dec, + title = {Unsupervised Deep Embedding for Clustering Analysis}, + author = {Xie, Junyuan and Girshick, Ross and Farhadi, Ali}, + booktitle = {Proceedings of The 33rd International Conference on Machine Learning}, + pages = {478--487}, + year = {2016}, + editor = {Balcan, Maria Florina and Weinberger, Kilian Q.}, + volume = {48}, + series = {Proceedings of Machine Learning Research}, + address = {New York, New York, \textsc{usa}}, + month = {6}, + publisher = {\textsc{pmlr}}, + shortseries = {\textsc{icml}}, + url = {https://proceedings.mlr.press/v48/xieb16.html} +} + +@article{deepbeeliefnets, + author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee-Whye}, + title = {A Fast Learning Algorithm for Deep Belief Nets}, + journal = {Neural Computation}, + volume = {18}, + number = {7}, + pages = {1527-1554}, + year = {2006}, + month = {07}, + issn = {0899-7667}, + doi = {10.1162/neco.2006.18.7.1527}, + shortseries = {\textsc{neco}}, + url = {https://direct.mit.edu/neco/article/18/7/1527/7065} +} + +@inproceedings{deepwalk, + author = {Perozzi, Bryan and Al-Rfou, Rami and Skiena, Steven}, + title = {DeepWalk: Online Learning of Social Representations}, + year = {2014}, + isbn = {9781450329569}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + url = {https://dl.acm.org/doi/pdf/10.1145/2623330.2623732}, + doi = {10.1145/2623330.2623732}, + booktitle = {Proceedings of the 20th \textsc{acm} \textsc{sigkdd} International Conference on Knowledge Discovery and Data Mining}, + pages = {701--710}, + location = {New York, \textsc{ny}, \textsc{usa}}, + shortseries = {\textsc{kdd}} +} + +@article{demorgan_syllogism3, + title = {On the Syllogism, No.~III, and on Logic in general}, + author = {Augustus De Morgan}, + journal = {Transactions of the Cambridge Philosophical Society}, + pages = {173--230}, + publisher = {University Press}, + volume = {10}, + year = {1864} +} + + +@inproceedings{dipre, + author = {Brin, Sergey}, + editor = {Atzeni, Paolo and Mendelzon, Alberto and Mecca, Giansalvatore}, + title = {Extracting Patterns and Relations from the World Wide Web}, + booktitle = {The World Wide Web and Databases}, + year = {1999}, + publisher = {Springer Berlin Heidelberg}, + address = {Berlin, Heidelberg}, + pages = {172--183}, + isbn = {978-3-540-48909-2}, + shortseries = {\textsc{w}eb\textsc{db}}, + url = {http://ilpubs.stanford.edu:8090/421/1/1999-65.pdf} +} + +@inproceedings{dirt, + author = {Lin, Dekang and Pantel, Patrick}, + title = {\textsc{dirt} -- Discovery of Inference Rules from Text}, + year = {2001}, + isbn = {158113391X}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + url = {http://www.patrickpantel.com/download/papers/2001/kdd01-1.pdf}, + doi = {10.1145/502512.502559}, + booktitle = {Proceedings of the Seventh \textsc{acm} \textsc{sigkdd} International Conference on Knowledge Discovery and Data Mining}, + pages = {323--328}, + numpages = {6}, + location = {San Francisco, California}, + shortseries = {\textsc{kdd}} +} + +@inproceedings{distant, + title = {Distant supervision for relation extraction without labeled data}, + author = {Mintz, Mike and Bills, Steven and Snow, Rion and Jurafsky, Daniel}, + booktitle = {Proceedings of the Joint Conference of the 47th Annual Meeting of the \textsc{acl} and the 4th International Joint Conference on Natural Language Processing of the \textsc{afnlp}}, + month = {8}, + year = {2009}, + address = {Suntec, Singapore}, + publisher = {Association for Computational Linguistics}, + shortseries = {\textsc{acl}}, + url = {https://aclanthology.org/P09-1113}, + pages = {1003--1011} +} + +@inproceedings{distant_early, + title = {Constructing biological knowledge bases by extracting information from text sources}, + author = {Mark Craven and Johan Kumlien}, + booktitle = {Proceedings of the Seventh International Conference on Intelligent Systems for Molecular Biology}, + volume = {1999}, + pages = {77--86}, + shortseries = {\textsc{ismb}}, + year = {1999}, + url = {https://www.aaai.org/Papers/ISMB/1999/ISMB99-010.pdf} +} + +@article{distributional_hypothesis, + author = {Zellig S. Harris}, + title = {Distributional Structure}, + journal = {\textsc{word}}, + volume = {10}, + number = {2--3}, + pages = {146--162}, + year = {1954}, + publisher = {Routledge}, + doi = {10.1080/00437956.1954.11659520} +} + +@inproceedings{elmo, + title = {Deep Contextualized Word Representations}, + author = {Peters, Matthew and Neumann, Mark and Iyyer, Mohit and Gardner, Matt and Clark, Christopher and Lee, Kenton and Zettlemoyer, Luke}, + booktitle = {Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)}, + shortseries = {\textsc{naacl}}, + month = {6}, + year = {2018}, + address = {New Orleans, Louisiana}, + publisher = {Association for Computational Linguistics}, + url = {https://www.aclweb.org/anthology/N18-1202}, + doi = {10.18653/v1/N18-1202}, + pages = {2227--2237} +} + +@inproceedings{epgnn, + title = {Improving Relation Classification by Entity Pair Graph}, + author = {Zhao, Yi and Wan, Huaiyu and Gao, Jianwei and Lin, Youfang}, + booktitle = {Proceedings of The Eleventh Asian Conference on Machine Learning}, + pages = {1156--1171}, + year = {2019}, + editor = {Lee, Wee Sun and Suzuki, Taiji}, + volume = {101}, + series = {Proceedings of Machine Learning Research}, + month = {11}, + pdf = {http://proceedings.mlr.press/v101/zhao19a/zhao19a.pdf}, + shortseries = {\textsc{pmlr}}, + url = {https://proceedings.mlr.press/v101/zhao19a.html} +} + +@article{fasttext, + title = {Enriching Word Vectors with Subword Information}, + author = {Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas}, + journal = {Transactions of the Association for Computational Linguistics}, + volume = {5}, + year = {2017}, + url = {https://www.aclweb.org/anthology/Q17-1010}, + doi = {10.1162/tacl_a_00051}, + pages = {135--146} +} + +@book{faustroll, + title = {Gestes et opinions du docteur Faustroll}, + author = {Alfred Jarry}, + year = {1911} +} + +@inproceedings{fewrel, + title = {{F}ew{R}el: A Large-Scale Supervised Few-Shot Relation Classification Dataset with State-of-the-Art Evaluation}, + author = {Han, Xu and Zhu, Hao and Yu, Pengfei and Wang, Ziyun and Yao, Yuan and Liu, Zhiyuan and Sun, Maosong}, + booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, + month = {10}, + year = {2018}, + address = {Brussels, Belgium}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D18-1514}, + doi = {10.18653/v1/D18-1514}, + shortseries = {\textsc{emnlp}}, + pages = {4803--4809} +} + +@inproceedings{fewrel2, + title = {{F}ew{R}el 2.0: Towards More Challenging Few-Shot Relation Classification}, + author = {Gao, Tianyu and Han, Xu and Zhu, Hao and Liu, Zhiyuan and Li, Peng and Sun, Maosong and Zhou, Jie}, + booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (\textsc{emnlp}-\textsc{ijcnlp})}, + month = {11}, + year = {2019}, + address = {Hong Kong, China}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D19-1649}, + doi = {10.18653/v1/D19-1649}, + shortseries = {\textsc{emnlp}}, + pages = {6250--6255} +} + +@inproceedings{fitb, + title = {Unsupervised Information Extraction: Regularizing Discriminative Approaches with Relation Distribution Losses}, + author = {Étienne Simon and Vincent Guigue and Benjamin Piwowarski}, + booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics}, + month = {7}, + year = {2019}, + location = {Florence, Italy}, + publisher = {Association for Computational Linguistics}, + url = {https://www.aclweb.org/anthology/P19-1133}, + doi = {10.18653/v1/P19-1133}, + shortseries = {\textsc{acl}}, + pages = {1378--1387} +} + +@inproceedings{freebase, + address = {New York, \textsc{ny}, \textsc{usa}}, + author = {Bollacker, Kurt and Evans, Colin and Paritosh, Praveen and Sturge, Tim and Taylor, Jamie}, + booktitle = {\textsc{sigmod} '08: Proceedings of the 2008 \textsc{acm} \textsc{sigmod} international conference on Management of data}, + doi = {10.1145/1376616.1376746}, + isbn = {978-1-60558-102-6}, + location = {Vancouver, Canada}, + pages = {1247--1250}, + publisher = {Association for Computing Machinery}, + title = {Freebase: a collaboratively created graph database for structuring human knowledge}, + url = {https://dl.acm.org/doi/pdf/10.1145/1376616.1376746}, + shortseries = {\textsc{sigmod}}, + year = {2008} +} + +@dataset{freebase_data, + title = {Freebase Data Dumps}, + author = {Google}, + url = {https://developers.google.com/freebase/data}, + year = {2016} +} + +@misc{freebase_processing, + title = {Freebase-triples: A Methodology for Processing the Freebase Data Dumps}, + author = {Niel Chah}, + year = {2017}, + eprint = {1712.08707}, + archivePrefix = {arXiv}, + primaryClass = {cs.DB} +} + +@inproceedings{gcn_spectral_early, + author={Joan Bruna and Wojciech Zaremba and Arthur D. Szlam and Yann LeCun}, + title={Spectral Networks and Locally Connected Networks on Graphs}, + editor = {Yoshua Bengio and Yann LeCun}, + booktitle = {2nd International Conference on Learning Representations, \textsc{iclr} 2014, Banff, \textsc{ab}, Canada, April 14-16, 2014, Conference Track Proceedings}, + year = {2014}, + shortseries = {\textsc{iclr}}, + url = {http://arxiv.org/abs/1312.6203} +} + +@inproceedings{gcn_spectral_semi, + title = {Semi-Supervised Classification with Graph Convolutional Networks}, + author = {Kipf, Thomas N and Welling, Max}, + booktitle = {International Conference on Learning Representations}, + year = {2017}, + shortseries = {\textsc{iclr}}, + url = {https://openreview.net/forum?id=SJU4ayYgl} +} + +@article{geneontology, + author = {{Gene Ontology Consortium}}, + title = {The Gene Ontology (\textsc{go}) database and informatics resource}, + journal = {Nucleic Acids Research}, + volume = {32}, + pages = {D258-D261}, + year = {2004}, + month = {01}, + issn = {0305-1048}, + doi = {10.1093/nar/gkh036}, + url = {https://academic.oup.com/nar/article-pdf/32/suppl\_1/D258/7621365/gkh036.pdf} +} + +@article{georgetown-ibm, + title = {The Georgetown--\textsc{ibm} experiment}, + author = {Dostert, Leon E}, + journal = {Machine translation of languages}, + publisher = {John Wiley \& Sons}, + location = {New York}, + pages = {124--135}, + year = {1955} +} + +@article{gi_counting, + title = {A note on the graph isomorphism counting problem}, + author = {Mathon, Rudolf}, + journal = {Information Processing Letters}, + volume = {8}, + number = {3}, + pages = {131--136}, + year = {1979}, + publisher = {Elsevier} +} + +@misc{gi_quasipoly, + title={Graph Isomorphism in Quasipolynomial Time}, + author={László Babai}, + year={2015}, + eprint={1512.03547}, + archivePrefix={arXiv}, + primaryClass={cs.DS} +} + +@article{gicomplete, + title={Graph isomorphism problem}, + author={Zemlyachenko, Viktor N and Korneenko, Nickolay M and Tyshkevich, Regina I}, + journal={Journal of Soviet Mathematics}, + volume={29}, + number={4}, + pages={1426--1481}, + year={1985}, + publisher={Springer} +} + +@article{gis, + issn = {00034851}, + url = {http://www.jstor.org/stable/2240069}, + author = {John Newton Darroch and D. Ratcliff}, + journal = {The Annals of Mathematical Statistics}, + number = {5}, + pages = {1470--1480}, + publisher = {Institute of Mathematical Statistics}, + title = {Generalized Iterative Scaling for Log-Linear Models}, + volume = {43}, + year = {1972} +} + +@inproceedings{glove, + title = {{G}lo{V}e: Global Vectors for Word Representation}, + author = {Pennington, Jeffrey and Socher, Richard and Manning, Christopher}, + booktitle = {Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (\textsc{emnlp})}, + month = {10}, + year = {2014}, + address = {Doha, Qatar}, + publisher = {Association for Computational Linguistics}, + url = {https://www.aclweb.org/anthology/D14-1162}, + doi = {10.3115/v1/D14-1162}, + pages = {1532--1543} +} + +@article{gnn_early, + author = {Sperduti, A. and Starita, A.}, + journal = {\textsc{ieee} Transactions on Neural Networks}, + title = {Supervised neural networks for the classification of structures}, + year = {1997}, + volume = {8}, + number = {3}, + pages = {714-735}, + doi = {10.1109/72.572108} +} + +@inproceedings{gnn_re, + title = {Graph Neural Networks with Generated Parameters for Relation Extraction}, + author = {Zhu, Hao and Lin, Yankai and Liu, Zhiyuan and Fu, Jie and Chua, Tat-Seng and Sun, Maosong}, + booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics}, + month = {7}, + year = {2019}, + address = {Florence, Italy}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P19-1128}, + doi = {10.18653/v1/P19-1128}, + pages = {1331--1339}, +} + +@inproceedings{gp-gnn, + title = {Graph Neural Networks with Generated Parameters for Relation Extraction}, + author = {Zhu, Hao and Lin, Yankai and Liu, Zhiyuan and Fu, Jie and Chua, Tat-Seng and Sun, Maosong}, + booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics}, + month = {7}, + year = {2019}, + address = {Florence, Italy}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P19-1128}, + doi = {10.18653/v1/P19-1128}, + pages = {1331--1339}, + shortseries = {\textsc{acl}} +} + +@misc{gpt, + title = {Improving Language Understanding by Generative Pre-Training}, + author = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya}, + year = {2018} +} + +@inproceedings{graph_attention_network, + title={Graph Attention Networks}, + author={Veličković, Petar and Cucurull, Guillem and Casanova, Arantxa and Romero, Adriana and Liò, Pietro and Bengio, Yoshua}, + journal={International Conference on Learning Representations}, + year={2018}, + shortseries = {\textsc{iclr}}, + url={https://openreview.net/forum?id=rJXMpikCZ}, +} + +@inproceedings{graph_continuous_hashing, + author = {Morris, Christopher and Kriege, Nils M. and Kersting, Kristian and Mutzel, Petra}, + booktitle = {2016 \textsc{ieee} 16th International Conference on Data Mining (\textsc{icdm})}, + title = {Faster Kernels for Graphs with Continuous Attributes via Hashing}, + year = {2016}, + pages = {1095--1100}, + organization = {\textsc{ieee}}, + doi = {10.1109/ICDM.2016.0142} +} + +@article{graph_fourier, + author = {Shuman, David I and Narang, Sunil K. and Frossard, Pascal and Ortega, Antonio and Vandergheynst, Pierre}, + journal = {\textsc{ieee} Signal Processing Magazine}, + title = {The emerging field of signal processing on graphs: Extending high-dimensional data analysis to networks and other irregular domains}, + year = {2013}, + volume = {30}, + number = {3}, + pages = {83-98}, + doi = {10.1109/MSP.2012.2235192}, + url = {https://arxiv.org/pdf/1211.0053.pdf} +} + +@inproceedings{graph_re_fs, + title = {Few-shot Relation Extraction via Bayesian Meta-learning on Relation Graphs}, + author = {Meng Qu and Tianyu Gao and Louis-Pascal Xhonneux and Jian Tang}, + booktitle = {Proceedings of the 37th International Conference on Machine Learning}, + url = {https://arxiv.org/pdf/2007.02387.pdf}, + shortseries = {\textsc{icml}}, + year = {2020} +} + +@inproceedings{graphie, + title = {Graph\textsc{ie}: A Graph-Based Framework for Information Extraction}, + author = {Qian, Yujie and Santus, Enrico and Jin, Zhijing and Guo, Jiang and Barzilay, Regina}, + booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)}, + month = {6}, + year = {2019}, + address = {Minneapolis, Minnesota}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/N19-1082}, + doi = {10.18653/v1/N19-1082}, + pages = {751--761} +} + +@inproceedings{graphrel, + title = {{G}raph{R}el: Modeling Text as Relational Graphs for Joint Entity and Relation Extraction}, + author = {Fu, Tsu-Jui and Li, Peng-Hsuan and Ma, Wei-Yun}, + booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics}, + month = {7}, + year = {2019}, + address = {Florence, Italy}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P19-1136}, + doi = {10.18653/v1/P19-1136}, + pages = {1409--1418} +} + +@inproceedings{graphsage, + author = {Hamilton, Will and Ying, Zhitao and Leskovec, Jure}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett}, + publisher = {Curran Associates, Inc.}, + title = {Inductive Representation Learning on Large Graphs}, + url = {https://proceedings.neurips.cc/paper/2017/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf}, + shortseries = {\textsc{n}eur\textsc{ips}}, + volume = {30}, + year = {2017} +} + +@book{gumbel_max, + title = {Statistical Theory of Extreme Values and Some Practical Applications. A Series of Lectures.}, + author = {Gumbel, Emil Julius}, + year = {1954}, + publisher = {\textsc{us} Government Printing Office}, + url = {https://ntrl.ntis.gov/NTRL/dashboard/searchResults/titleDetail/PB175818.xhtml} +} + +@inproceedings{gumbel_softmax, + title = {Categorical reparameterization with gumbel--softmax}, + author = {Jang, Eric and Gu, Shixiang and Poole, Ben}, + booktitle = {International Conference on Learning Representations}, + year = {2016}, + shortseries = {\textsc{iclr}}, + url = {https://openreview.net/forum?id=rkE3y85ee} +} + +@book{hansen_mass_noun_hypothesis, + title = {Language and logic in ancient China}, + author = {Chad D. Hansen}, + year = {1983}, + publisher = {University of Michigan Press} +} + +@inproceedings{hasegawa, + title = {Discovering Relations among Named Entities from Large Corpora}, + author = {Hasegawa, Takaaki and Sekine, Satoshi and Grishman, Ralph}, + booktitle = {Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics (\textsc{acl}-04)}, + month = {7}, + year = {2004}, + address = {Barcelona, Spain}, + url = {https://aclanthology.org/P04-1053}, + doi = {10.3115/1218955.1219008}, + shortseries = {\textsc{acl}}, + pages = {415--422} +} + +@inproceedings{hearst_hyponyms, + title = {Automatic Acquisition of Hyponyms from Large Text Corpora}, + author = {Hearst, Marti A.}, + booktitle = {\textsc{coling} 1992 Volume 2: The 14th International Conference on Computational Linguistics}, + year = {1992}, + shortseries = {\textsc{coling}}, + url = {https://aclanthology.org/C92-2082} +} + +@inproceedings{heterogeneous_attention, + author = {Wang, Xiao and Ji, Houye and Shi, Chuan and Wang, Bai and Ye, Yanfang and Cui, Peng and Yu, Philip S}, + title = {Heterogeneous Graph Attention Network}, + year = {2019}, + isbn = {9781450366748}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + doi = {10.1145/3308558.3313562}, + booktitle = {The World Wide Web Conference}, + pages = {2022--2032}, + keywords = {Graph Analysis, Social Network, Neural Network}, + location = {San Francisco, \textsc{ca}, \textsc{usa}}, + shortseries = {\textsc{www}}, + doi = {10.1145/3308558.3313562}, + url = {https://dl.acm.org/doi/pdf/10.1145/3308558.3313562} +} + +@inbook{heterogeneous_transformer, + author = {Hu, Ziniu and Dong, Yuxiao and Wang, Kuansan and Sun, Yizhou}, + title = {Heterogeneous Graph Transformer}, + year = {2020}, + isbn = {9781450370233}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + doi = {10.1145/3366423.3380027}, + booktitle = {Proceedings of The Web Conference 2020}, + pages = {2704--2710}, + shortseries = {\textsc{www}}, + doi = {10.1145/3366423.3380027}, + url = {https://dl.acm.org/doi/pdf/10.1145/3366423.3380027} +} + +@inproceedings{ie2, + title = {\textsc{sra}: Description of the \textsc{ie}\textsuperscript{2} System Used for \textsc{muc-7}}, + author = {Chinatsu Aone and Lauren Halverson and Tom Hampton and Mila Ramos-Santacruz}, + booktitle = {Seventh Message Understanding Conference (\textsc{muc-7}): Proceedings of a Conference Held in Fairfax, Virginia, {A}pril 29 -- May 1, 1998}, + year = {1998}, + shortseries = {\textsc{muc}}, + url = {https://aclanthology.org/M98-1012} +} + +@article{is-a_analysis, + author = {Ronald Brachman}, + journal = {Computer}, + title = {What \textsc{is-a} Is and Isn't: An Analysis of Taxonomic Links in Semantic Networks}, + year = {1983}, + volume = {16}, + number = {10}, + issn = {1558-0814}, + pages = {30--36}, + doi = {10.1109/MC.1983.1654194}, + publisher = {\textsc{ieee} Computer Society}, + address = {Los Alamitos, \textsc{ca}, \textsc{usa}}, + shortseries = {Computer}, + url = {https://doi.ieeecomputersociety.org/10.1109/MC.1983.1654194}, + month = {10} +} + +@inproceedings{kb_document_retrieval, + author = {Dalton, Jeffrey and Dietz, Laura and Allan, James}, + title = {Entity Query Feature Expansion Using Knowledge Base Links}, + booktitle = {Proceedings of the 37th International \textsc{acm} \textsc{sigir} Conference on Research \& Development in Information Retrieval}, + series = {\textsc{sigir} '14}, + year = {2014}, + isbn = {978-1-4503-2257-7}, + location = {Gold Coast, Queensland, Australia}, + pages = {365--374}, + url = {http://doi.acm.org/10.1145/2600428.2609628}, + doi = {10.1145/2600428.2609628}, + publisher = {\textsc{acm}}, + address = {New York, \textsc{ny}, \textsc{usa}}, +} + +@inproceedings{kb_qa1, + title = {Semantic Parsing on {F}reebase from Question-Answer Pairs}, + author = {Berant, Jonathan and Chou, Andrew and Frostig, Roy and Liang, Percy}, + booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing}, + month = {10}, + year = {2013}, + address = {Seattle, Washington, USA}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D13-1160}, + pages = {1533--1544}, +} + +@inproceedings{kb_qa2, + author={Yih, Wen-tau and Chang, Ming-Wei and He, Xiaodong and Gao, Jianfeng}, + title={Semantic Parsing via Staged Query Graph Generation: Question Answering with Knowledge Base}, + booktitle={Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)}, + year={2015}, + publisher={Association for Computational Linguistics}, + pages={1321--1331}, + location={Beijing, China}, + doi={10.3115/v1/P15-1128}, + url={http://aclweb.org/anthology/P15-1128} +} + +@inproceedings{kernel_dependency, + title = {Dependency Tree Kernels for Relation Extraction}, + author = {Aron Culotta and Jeffrey Sorensen}, + booktitle = {Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics}, + month = {7}, + year = {2004}, + address = {Barcelona, Spain}, + url = {https://aclanthology.org/P04-1054}, + doi = {10.3115/1218955.1219009}, + shortseries = {\textsc{acl}}, + pages = {423--429} +} + +@inproceedings{kernel_exploring, + title = {Exploring Various Knowledge in Relation Extraction}, + author = {GuoDong Zhou and Jian Su and Jie Zhang and Min Zhang}, + booktitle = {Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics}, + month = {6}, + year = {2005}, + address = {Ann Arbor, Michigan}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P05-1053}, + doi = {10.3115/1219840.1219893}, + shortseries = {\textsc{acl}}, + pages = {427--434} +} + +@article{kernel_parse, + author = {Dmitry Zelenko and Chinatsu Aone and Anthony Richardella}, + title = {Kernel Methods for Relation Extraction}, + year = {2003}, + volume = {3}, + issn = {1532-4435}, + journal = {The Journal of Machine Learning Research}, + month = {3}, + shortseries = {\textsc{jmlr}}, + pages = {1083--1106}, + url = {https://www.jmlr.org/papers/volume3/zelenko03a/zelenko03a.pdf} +} + +@inproceedings{knowitall, + title = {Web-scale information extraction in knowitall: (preliminary results)}, + author = {Oren Etzioni and Michael Cafarella and Doug Downey and Stanley Kok and Ana-Maria Popescu and Tal Shaked and Stephen Soderland and Daniel S Weld and Alexander Yates}, + booktitle = {Proceedings of the 13th international conference on World Wide Web}, + pages = {100--110}, + doi = {10.1145/988672.988687}, + shortseries = {\textsc{www}}, + year = {2004} +} + +@inproceedings{knowledgenet, + title = {{K}nowledge{N}et: A Benchmark Dataset for Knowledge Base Population}, + author = {Mesquita, Filipe and Cannaviccio, Matteo and Schmidek, Jordan and Mirza, Paramita and Barbosa, Denilson}, + booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (\textsc{emnlp}-\textsc{ijcnlp})}, + month = {11}, + year = {2019}, + address = {Hong Kong, China}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D19-1069}, + doi = {10.18653/v1/D19-1069}, + pages = {749--758} +} + +@article{label_propagation, + title = {Learning from labeled and unlabeled data with label propagation}, + author = {Xiaojin Zhu and Zoubin Ghahramani}, + year = {2002}, + journal = {Technical Report \textsc{cmu}-\textsc{cald}}, + url = {https://mlg.eng.cam.ac.uk/zoubin/papers/CMU-CALD-02-107.pdf} +} + +@inproceedings{label_propagation_re, + title = {Relation Extraction Using Label Propagation Based Semi-Supervised Learning}, + author = {Jinxiu Chen and Donghong Ji and Chew Lim Tan and Zhengyu Niu}, + booktitle = {Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics}, + month = {7}, + year = {2006}, + address = {Sydney, Australia}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P06-1017}, + doi = {10.3115/1220175.1220192}, + shortseries = {\textsc{acl}}, + pages = {129--136} +} + +@misc{layernorm, + title = {Layer Normalization}, + author = {Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey}, + year = {2016}, + eprint = {1607.06450}, + archivePrefix = {arXiv}, + primaryClass = {stat.ML} +} + +@article{lda, + title = {Latent dirichlet allocation}, + author = {David M Blei and Andrew Y Ng and Michael Jordan}, + journal = {The Journal of Machine Learning Research}, + volume = {3}, + pages = {993--1022}, + year = {2003}, + url = {https://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf} +} + +@inproceedings{leakyrelu, + title = {Rectifier nonlinearities improve neural network acoustic models}, + author = {Maas, Andrew L and Hannun, Awni Y and Ng, Andrew Y and others}, + booktitle = {Proceedings of the 30th International Conference on Machine Learning (\textsc{icml}-13)}, + volume = {30}, + number = {1}, + pages = {3}, + year = {2013}, + url = {https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf} +} + +@misc{leman_spelling, + title={Graph Isomorphism in Quasipolynomial Time}, + author={László Babai}, + year={2016}, + eprint={1512.03547}, + archivePrefix={arXiv}, + primaryClass={cs.DS} +} + +@book{linguistique_generale, + title = {Cours de linguistique générale}, + author = {de Saussure, Ferdinand}, + year = {1916}, + editor = {Bally, Charles et Seche\-haye, Albert}, + language = {French}, + publisher = {Payot} +} + +@misc{lm_limits, + title = {Exploring the Limits of Language Modeling}, + author = {Jozefowicz, Rafal and Vinyals, Oriol and Schuster, Mike and Shazeer, Noam and Wu, Yonghui}, + year = {2016}, + eprint = {1602.02410}, + archivePrefix = {arXiv}, + primaryClass = {cs.CL} +} + +@inproceedings{lm_rp, + title = {Distilling Relation Embeddings from Pretrained Language Models}, + author = {Ushio, Asahi and Camacho-Collados, Jose and Schockaert, Steven}, + booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + month = {11}, + year = {2021}, + address = {Online and Punta Cana, Dominican Republic}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/2021.emnlp-main.712}, + doi = {10.18653/v1/2021.emnlp-main.712}, + pages = {9044--9062} +} + +@inproceedings{lsa, + title = {Using latent semantic analysis to improve access to textual information}, + author = {Dumais, Susan T and Furnas, George W and Landauer, Thomas K and Deerwester, Scott and Harshman, Richard}, + booktitle = {Proceedings of the \textsc{sigchi} conference on Human factors in computing systems}, + pages = {281--285}, + shortseries = {\textsc{sigchi}}, + year = {1988}, + doi = {10.1145/57167.57214}, + url = {https://dl.acm.org/doi/pdf/10.1145/57167.57214}, +} + +@article{lstm, + author = {Hochreiter, Sepp and Schmidhuber, Jürgen}, + title = {Long Short-Term Memory}, + journal = {Neural Computation}, + volume = {9}, + number = {8}, + pages = {1735--1780}, + year = {1997}, + month = {11}, + issn = {0899-7667}, + doi = {10.1162/neco.1997.9.8.1735}, + shortseries = {\textsc{neco}}, + url = {https://direct.mit.edu/neco/article/9/8/1735/6109} +} + +@article{lstm_odyssey, + author = {Greff, Klaus and Srivastava, Rupesh K. and Koutník, Jan and Steunebrink, Bas R. and Schmidhuber, Jürgen}, + journal = {\textsc{ieee} Transactions on Neural Networks and Learning Systems}, + publisher = {Institute of Electrical and Electronics Engineers}, + title = {\textsc{lstm}: A Search Space Odyssey}, + year = {2017}, + volume = {28}, + number = {10}, + pages = {2222--2232}, + doi = {10.1109/TNNLS.2016.2582924} +} + +@article{lstm_vanishing, + author = {Hochreiter, Sepp}, + year = {1998}, + month = {04}, + pages = {107--116}, + title = {The Vanishing Gradient Problem During Learning Recurrent Neural Nets and Problem Solutions}, + volume = {6}, + journal = {International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems}, + doi = {10.1142/S0218488598000094} +} + +@article{marginalia_sino-logica, + title={Marginalia sino-logica}, + author={Christoph Harbsmeier}, + journal={Understanding the Chinese mind}, + pages={125--166}, + year={1989}, + publisher={Oxford University Press Oxford} +} + +@inproceedings{maximum_entropy_re, + title = {Combining Lexical, Syntactic, and Semantic Features with Maximum Entropy Models for Information Extraction}, + author = {Nanda Kambhatla}, + booktitle = {Proceedings of the \textsc{acl} Interactive Poster and Demonstration Sessions}, + month = {7}, + year = {2004}, + address = {Barcelona, Spain}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P04-3022}, + shortseries = {\textsc{acl}}, + pages = {178--181} +} + +@inproceedings{maxpool, + title = {A neural network for speaker-independent isolated word recognition}, + author = {Yamaguchi, Kouichi and Sakamoto, Kenji and Akabane, Toshio}, + series = {First International Conference on Spoken Language Processing}, + month = {11}, + year = {1990}, + address = {Kobe, Japan}, + url = {https://www.isca-speech.org/archive/icslp_1990/i90_1077.html}, + pages = {1077--1080} +} + +@article{meaning_skepticism, + title = {Skepticism about Meaning: Indeterminacy, Normativity, and the Rule-Following Paradox}, + volume = {23}, + doi = {10.1080/00455091.1997.10715967}, + journal = {Canadian Journal of Philosophy Supplementary Volume}, + publisher = {Cambridge University Press}, + author = {Soames, Scott}, + year = {1997}, + pages = {211--249} +} + +@inproceedings{memory_networks, + author = {Weston, Jason and Chopra, Sumit and Bordes, Antoine}, + editor = {Bengio, Yoshua and LeCun, Yann}, + title = {Memory Networks}, + booktitle = {3rd International Conference on Learning Representations (\textsc{iclr}), Conference Track Proceedings}, + eventdate = {2015-05-07/2015-05-09}, + location = {San Diego, \textsc{ca}, \textsc{usa}}, + shortseries = {\textsc{iclr}}, + year = {2015}, + url = {http://arxiv.org/abs/1410.3916} +} + +@inproceedings{memory_networks_end-to-end, + author = {Sukhbaatar, Sainbayar and Szlam, Arthur and Weston, Jason and Fergus, Rob}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, + publisher = {Curran Associates, Inc.}, + title = {End-To-End Memory Networks}, + shortseries = {\textsc{n}eur\textsc{ips}}, + url = {https://proceedings.neurips.cc/paper/2015/file/8fb21ee7a2207526da55a679f0332de2-Paper.pdf}, + volume = {28}, + year = {2015} +} + +@inproceedings{miml, + title = {Multi-instance Multi-label Learning for Relation Extraction}, + author = {Mihai Surdeanu and Julie Tibshirani and Ramesh Nallapati and Christopher Manning}, + booktitle = {Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning}, + month = {7}, + year = {2012}, + address = {Jeju Island, Korea}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D12-1042}, + shortseries = {\textsc{emnlp}}, + pages = {455--465} +} + +@inproceedings{mmsrl, + title = {Fine-tuning and Sampling Strategies for Multimodal Role Labeling of Entities under Class Imbalance}, + author = {Montariol, Syrielle and Simon, Étienne and Riabi, Arij and Seddah, Djam{\'e}}, + author+an = {1=equalcontribution;2=equalcontribution}, + booktitle = {Proceedings of the Workshop on Combating Online Hostile Posts in Regional Languages during Emergency Situations}, + month = {5}, + year = {2022}, + address = {Dublin, Ireland}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/2022.constraint-1.7}, + pages = {55--65}, + shortseries = {\textsc{constraint}} +} + +@book{molloy, + title = {Molloy}, + author = {Samuel Beckett}, + year = {1955} +} + +@inproceedings{mtb, + title = {Matching the Blanks: Distributional Similarity for Relation Learning}, + author = {Livio Baldini Soares and Nicholas FitzGerald and Jeffrey Ling and Tom Kwiatkowski}, + booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics}, + month = {7}, + year = {2019}, + address = {Florence, Italy}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P19-1279}, + doi = {10.18653/v1/P19-1279}, + shortseries = {\textsc{acl}}, + pages = {2895--2905} +} + +@inproceedings{mtb_low, + title = {Few-shot Relation Extraction via {B}ayesian Meta-learning on Relation Graphs}, + author = {Qu, Meng and Gao, Tianyu and Xhonneux, Louis-Pascal and Tang, Jian}, + booktitle = {Proceedings of the 37th International Conference on Machine Learning}, + pages = {7867--7876}, + year = {2020}, + editor = {Hal Daumé III and Aarti Singh}, + volume = {119}, + series = {Proceedings of Machine Learning Research}, + month = {7}, + publisher = {\textsc{pmlr}}, + url = {https://proceedings.mlr.press/v119/qu20a.html} +} + +@inproceedings{muc7, + title = {Overview of \textsc{muc}-7}, + author = {Chinchor, Nancy A.}, + booktitle = {Seventh Message Understanding Conference (\textsc{muc}-7): Proceedings of a Conference Held in Fairfax, Virginia, {A}pril 29 - May 1, 1998}, + year = {1998}, + shortseries = {\textsc{muc}}, + url = {https://aclanthology.org/M98-1001} +} + +@article{multi-instance, + author = {Thomas G. Dietterich and Richard H. Lathrop and Tomás Lozano-Pérez}, + title = {Solving the multiple instance problem with axis-parallel rectangles}, + journal = {Artificial Intelligence}, + volume = {89}, + number = {1}, + pages = {31--71}, + year = {1997}, + issn = {0004-3702}, + doi = {https://doi.org/10.1016/S0004-3702(96)00034-3}, + url = {https://www.sciencedirect.com/science/article/pii/S0004370296000343} +} + +@inproceedings{multir, + title = {Knowledge-Based Weak Supervision for Information Extraction of Overlapping Relations}, + author = {Raphael Hoffmann and Congle Zhang and Xiao Ling and Luke Zettlemoyer and Daniel Weld}, + booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies}, + month = {6}, + year = {2011}, + address = {Portland, Oregon, \textsc{usa}}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P11-1055}, + shortseries = {\textsc{acl}}, + pages = {541--550}, +} + +@inproceedings{n-ary_old, + title = {Simple Algorithms for Complex Relation Extraction with Applications to Biomedical \textsc{ie}}, + author = {McDonald, Ryan and Pereira, Fernando and Kulick, Seth and Winters, Scott and Jin, Yang and White, Pete}, + booktitle = {Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (\textsc{acl}{'}05)}, + month = {6}, + year = {2005}, + address = {Ann Arbor, Michigan}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P05-1061}, + doi = {10.3115/1219840.1219901}, + pages = {491--498} +} + +@inproceedings{n-ary_recent, + title = {N-ary Relation Extraction using Graph-State \textsc{lstm}}, + author = {Song, Linfeng and Zhang, Yue and Wang, Zhiguo and Gildea, Daniel}, + booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, + month = {10}, + year = {2018}, + address = {Brussels, Belgium}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D18-1246}, + doi = {10.18653/v1/D18-1246}, + pages = {2226--2235}, +} + +@book{nahuatl, + title={The mesoamerican indian languages}, + author={Suárez, Jorge A}, + year={1983}, + publisher={Cambridge University Press} +} + +@inproceedings{nce, + title = {Noise-\wordboundary contrastive estimation: A new estimation principle for unnormalized statistical models}, + author = {Gutmann, Michael and Hyvärinen, Aapo}, + booktitle = {Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics}, + pages = {297--304}, + year = {2010}, + eventdate = {2010-05-13/2010-05-15}, + organization = {\textsc{jmlr} Workshop and Conference Proceedings}, + editor = {Teh, Yee Whye and Titterington, Mike}, + volume = {9}, + series = {Proceedings of Machine Learning Research}, + shortseries = {\textsc{aistats}}, + location = {Chia Laguna Resort, Sardinia, Italy}, + url = {http://proceedings.mlr.press/v9/gutmann10a.html} +} + +@inproceedings{nmt_encdec, + title = {Learning Phrase Representations using \textsc{rnn} Encoder{--}Decoder for Statistical Machine Translation}, + author = {Cho, Kyunghyun and van Merriënboer, Bart and Gulçehre, Çağlar and Bahdanau, Dzmitry and Bougares, Fethi and Sch\-wenk, Holger and Bengio, Yoshua}, + booktitle = {Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (\textsc{emnlp})}, + month = {10}, + year = {2014}, + address = {Doha, Qatar}, + publisher = {Association for Computational Linguistics}, + url = {https://www.aclweb.org/anthology/D14-1179}, + doi = {10.3115/v1/D14-1179}, + pages = {1724--1734} +} + +@online{norvig_chomsky, + author = {Peter Norvig}, + title = {On Chomsky and the Two Cultures of Statistical Learning}, + year = {2011}, + url = {https://norvig.com/chomsky.html} +} + +@article{nplm, + author = {Bengio, Yoshua and Ducharme, Réjean and Vincent, Pascal and Janvin, Christian}, + title = {A Neural Probabilistic Language Model}, + year = {2003}, + month = {3}, + shortseries = {\textsc{jmlr}}, + volume = {3}, + journal = {The Journal of Machine Learning Research}, + pages = {1137--1155}, + url = {https://www.jmlr.org/papers/volume3/tmp/bengio03a.pdf} +} + +@inproceedings{nplm_nce, + title = {A fast and simple algorithm for training neural probabilistic language models}, + author = {Mnih, Andriy and Teh, Yee Whye}, + booktitle = {Proceedings of the 29th International Conference on Machine Learning}, + url = {http://icml.cc/2012/papers/855.pdf}, + year = {2012}, + pages = {58} +} + +@inproceedings{ntn, + author = {Socher, Richard and Chen, Danqi and Manning, Christopher D and Ng, Andrew}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C.J. Burges and L. Bottou and M. Welling and Z. Ghahramani and K.Q. Weinberger}, + publisher = {Curran Associates, Inc.}, + title = {Reasoning With Neural Tensor Networks for Knowledge Base Completion}, + url = {https://proceedings.neurips.cc/paper/2013/file/b337e84de8752b27eda3a12363109e80-Paper.pdf}, + volume = {26}, + year = {2013} +} + +@dataset{nyt, + title = {The New York Times Annotated Corpus}, + author = {Evan Sandhaus}, + number = {LDC2008T19}, + url = {https://catalog.ldc.upenn.edu/LDC2008T19}, + year = {2008}, + publisher = {Linguistic Data Consortium}, + location = {Philadelphia}, + doi = {10.35111/77ba-9x74}, + isbn = {1-58563-486-7}, + shortseries = {\textsc{ldc}}, +} + +@inproceedings{oie, + author = {Michele Banko and Michael Cafarella and Stephen Soderland and Matt Broadhead and Oren Etzioni}, + title = {Open Information Extraction from the Web}, + year = {2007}, + publisher = {Morgan Kaufmann Publishers Inc.}, + address = {San Francisco, \textsc{ca}, \textsc{usa}}, + booktitle = {Proceedings of the 20th International Joint Conference on Artifical Intelligence}, + shortseries = {\textsc{ijcai}}, + pages = {2670--2676}, + url = {https://www.aaai.org/Papers/IJCAI/2007/IJCAI07-429.pdf}, + location = {Hyderabad, India}, +} + +@misc{oucuipo, + author = {Gil Chevalier}, + title = {Frontispice de la Bibliothèque Oucuipienne}, + date = {1990~}, + publisher = {Plein chant} +} + +@article{over_grammar, + title = {Reconsidering prepositional polysemy networks: The case of over}, + author = {Tyler, Andrea and Evans, Vyvyan}, + journal = {Language}, + pages = {724--765}, + year = {2001}, +} + +@inbook{paris_quadrifolia, + title = {Paris Quadrifolia}, + booktitle = {Les Liliacées}, + author = {Pierre-Joseph Redouté}, + year = {1802}, + addendum = {Via Wikimedia Commons}, + url = {https://commons.wikimedia.org/wiki/File:Paris_quadrifolia_in_Les_liliacees.jpg} +} + +@article{path_counting_estimation, + title = {Estimating the Number of $s$--$t$ Paths in a Graph.}, + author = {Roberts, Ben and Kroese, Dirk P}, + journal = {Journal of Graph Algorithms and Applications}, + volume = {11}, + number = {1}, + pages = {195--214}, + year = {2007} +} + +@article{path_counting_sharp_p, + author = {Valiant, Leslie G.}, + title = {The Complexity of Enumeration and Reliability Problems}, + journal = {\textsc{siam} Journal on Computing}, + volume = {8}, + number = {3}, + pages = {410-421}, + year = {1979}, + doi = {10.1137/0208032}, +} + +@inproceedings{pcfg, + title = {Accurate Unlexicalized Parsing}, + author = {Dan Klein and Christopher Manning}, + booktitle = {Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics}, + month = {7}, + year = {2003}, + address = {Sapporo, Japan}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P03-1054}, + doi = {10.3115/1075096.1075150}, + pages = {423--430} +} + +@inproceedings{pcnn, + title = {Distant Supervision for Relation Extraction via Piecewise Convolutional Neural Networks}, + author = {Daojian Zeng and Kang Liu and Yubo Chen and Jun Zhao}, + booktitle = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing}, + month = {9}, + year = {2015}, + address = {Lisbon, Portugal}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D15-1203}, + doi = {10.18653/v1/D15-1203}, + shortseries = {\textsc{emnlp}}, + pages = {1753--1762} +} + +@inproceedings{pcnn_attention, + title = {Neural Relation Extraction with Selective Attention over Instances}, + author = {Yankai Lin and Shiqi Shen and Zhiyuan Liu and Huanbo Luan and Maosong Sun}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + month = {8}, + year = {2016}, + address = {Berlin, Germany}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P16-1200}, + doi = {10.18653/v1/P16-1200}, + shortseries = {\textsc{acl}}, + pages = {2124--2133} +} + +@inproceedings{planetoid, + title = {Revisiting Semi-Super\-vised Learning with Graph Embeddings}, + author = {Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan}, + booktitle = {Proceedings of The 33rd International Conference on Machine Learning}, + pages = {40--48}, + year = {2016}, + editor = {Balcan, Maria Florina and Weinberger, Kilian Q.}, + volume = {48}, + series = {Proceedings of Machine Learning Research}, + address = {New York, \textsc{ny}, \textsc{usa}}, + month = {6}, + publisher = {\textsc{pmlr}}, + url = {https://proceedings.mlr.press/v48/yanga16.html}, + shortseries = {\textsc{icml}} +} + +@inproceedings{prepositions_francais, + title = {La couleur des prépositions à et de}, + author = {Marque-Pucheu, Christiane}, + pages = {74--105}, + journal = {Langue française}, + year = {2008}, + volume = {157}, + url = {https://www.cairn.info/load_pdf.php?ID_ARTICLE=LF_157_0074}, + publisher = {Armand Colin}, + address = {Paris, France}, + doi = {10.3917/lf.157.0074} +} + +@book{probabilistic_reasoning, + title = {Probabilistic reasoning in intelligent systems: networks of plausible inference}, + author = {Pearl, Judea}, + year = {1988}, + doi = {10.5555/52121}, + issn = {978-0-934613-73-6}, + publisher = {Morgan kaufmann} +} + +@inproceedings{prototypical_re, + title = {Prototypical Representation Learning for Relation Extraction}, + author = {Ning Ding and Xiaobin Wang and Yao Fu and Guangwei Xu and Rui Wang and Pengjun Xie and Ying Shen and Fei Huang and Hai-Tao Zheng and Rui Zhang}, + booktitle = {International Conference on Learning Representations}, + year = {2021}, + shortseries = {\textsc{iclr}}, + url = {https://openreview.net/forum?id=aCgLmfhIy_f} +} + +@article{quine_two_dogma, + title = {Main Trends in Recent Philosophy: Two Dogmas of Empiricism}, + author = {Willard Van Orman Quine}, + journal = {The Philosophical Review}, + number = {1}, + pages = {20--43}, + publisher = {Duke University Press}, + volume = {60}, + issn = {00318108, 15581470}, + year = {1951}, + url = {http://www.jstor.org/stable/2181906} +} + +@book{quine_two_dogma_fr, + title = {Du point de vue logique : neuf essais logico-philosophiques}, + author = {Willard Van Orman Quine}, + translator = {Sandra Laugier}, + publisher = {Vrin}, + year = {2004} +} + +@inproceedings{rellda, + title = {Structured Relation Discovery using Generative Models}, + author = {Limin Yao and Aria Haghighi and Sebastian Riedel and Andrew McCallum}, + booktitle = {Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing}, + month = {7}, + year = {2011}, + address = {Edinburgh, Scotland, \textsc{uk}}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D11-1135}, + shortseries = {\textsc{emnlp}}, + pages = {1456--1466} +} + +@inproceedings{rellda_sense, + title = {Unsupervised Relation Discovery with Sense Disambiguation}, + author = {Limin Yao and Sebastian Riedel and Andrew McCallum}, + month = {7}, + year = {2012}, + address = {Jeju Island, Korea}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P12-1075}, + shortseries = {\textsc{acl}}, + pages = {712--720} +} + +@inproceedings{relu, + title = {Deep Sparse Rectifier Neural Networks}, + author = {Glorot, Xavier and Bordes, Antoine and Bengio, Yoshua}, + booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics}, + pages = {315--323}, + year = {2011}, + editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav}, + volume = {15}, + series = {Proceedings of Machine Learning Research}, + address = {Fort Lauderdale, \textsc{fl}, \textsc{usa}}, + eventdate = {2011-04-11/2011-04-13}, + url = {http://proceedings.mlr.press/v15/glorot11a.html} +} + +@inproceedings{rescal, + author = {Maximilian Nickel and Volker Tresp and Hans-Peter Kriegel}, + title = {A Three-Way Model for Collective Learning on Multi-Relational Data}, + booktitle = {Proceedings of the 28th International Conference on Machine Learning (\textsc{icml}-11)}, + shortseries = {\textsc{icml}}, + year = {2011}, + editor = {Lise Getoor and Tobias Scheffer}, + location = {Bellevue, \textsc{wa}, \textsc{usa}}, + isbn = {978-1-4503-0619-5}, + month = {6}, + publisher = {\textsc{acm}}, + address = {New York, \textsc{ny}, \textsc{usa}}, + pages= {809--816}, + url = {https://icml.cc/2011/papers/438_icmlpaper.pdf} +} + +@inproceedings{rgcn, + author = {Schlichtkrull, Michael and Kipf, Thomas N. and Bloem, Peter and van den Berg, Rianne and Titov, Ivan and Welling, Max}, + editor = {Gangemi, Aldo and Navigli, Roberto and Vidal, Maria-Esther and Hitzler, Pascal and Troncy, Raphaël and Hollink, Laura and Tordai, Anna and Alam, Mehwish}, + title = {Modeling Relational Data with Graph Convolutional Networks}, + booktitle = {The Semantic Web}, + year = {2018}, + publisher = {Springer International Publishing}, + address = {Cham}, + pages = {593--607}, + url = {https://arxiv.org/pdf/1703.06103.pdf}, + isbn = {978-3-319-93417-4} +} + +@article{ri, + author = {William M. Rand}, + title = {Objective Criteria for the Evaluation of Clustering Methods}, + journal = {Journal of the American Statistical Association}, + volume = {66}, + number = {336}, + pages = {846--850}, + year = {1971}, + publisher = {Taylor \& Francis}, + shortseries = {\textsc{jasa}}, + doi = {10.1080/01621459.1971.10482356} +} + +@inproceedings{rmvs, + title = {Semantic Compositionality through Recursive Matrix-Vector Spaces}, + author = {Socher, Richard and Huval, Brody and Manning, Christopher D. and Ng, Andrew Y.}, + booktitle = {Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning}, + month = {7}, + year = {2012}, + address = {Jeju Island, Korea}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D12-1110}, + pages = {1201--1211} +} + + +@inproceedings{selfore, + title = {{S}elf\textsc{ore}: Self-supervised Relational Feature Learning for Open Relation Extraction}, + author = {Xuming Hu and Lijie Wen and Yusong Xu and Chenwei Zhang and Philip Yu}, + booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (\textsc{emnlp})}, + month = {11}, + year = {2020}, + address = {Online}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/2020.emnlp-main.299}, + doi = {10.18653/v1/2020.emnlp-main.299}, + shortseries = {\textsc{emnlp}}, + pages = {3673--3682} +} + +@online{selfsupervised, + author = {Yann LeCun and Ishan Misra}, + title = {Self-supervised learning: The dark matter of intelligence}, + date = {2021-03-04}, + url = {https://ai.facebook.com/blog/self-supervised-learning-the-dark-matter-of-intelligence}, + urldate = {2021-11-08} +} + +@inproceedings{semeval2010task8, + title = {{S}em{E}val-2010 Task 8: Multi-Way Classification of Semantic Relations between Pairs of Nominals}, + author = {Hendrickx, Iris and Kim, Su Nam and Kozareva, Zornitsa and Nakov, Preslav and Ó Séaghdha, Diarmuid and Padó, Sebastian and Pennacchiotti, Marco and Romano, Lorenza and Szpakowicz, Stan}, + booktitle = {Proceedings of the 5th International Workshop on Semantic Evaluation}, + month = {7}, + year = {2010}, + address = {Uppsala, Sweden}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/S10-1006}, + shortseries = {SemEval}, + pages = {33--38} +} + +@incollection{sep_meaning, + author = {Speaks, Jeff}, + title = {Theories of Meaning}, + booktitle = {The Stanford Encyclopedia of Philosophy}, + editor = {Edward N. Zalta}, + url = {https://plato.stanford.edu/archives/spr2021/entries/meaning/}, + year = {2021}, + edition = {Spring 2021}, + publisher = {Metaphysics Research Lab, Stanford University} +} + +@incollection{sep_medieval_categories, + author = {Gracia, Jorge and Newton, Lloyd}, + title = {{Medieval Theories of the Categories}}, + booktitle = {The Stanford Encyclopedia of Philosophy}, + editor = {Edward N. Zalta}, + url = {https://plato.stanford.edu/archives/win2016/entries/medieval-categories/}, + year = {2016}, + edition = {Winter 2016}, + publisher = {Metaphysics Research Lab, Stanford University} +} + +@incollection{sep_relations, + author = {MacBride, Fraser}, + title = {Relations}, + booktitle = {The Stanford Encyclopedia of Philosophy}, + editor = {Edward N. Zalta}, + url = {https://plato.stanford.edu/archives/win2020/entries/relations/}, + year = {2020}, + edition = {Winter 2020}, + publisher = {Metaphysics Research Lab, Stanford University} +} + +@incollection{shallow_parse_tree, + title={Parsing by chunks}, + author={Steven P. Abney}, + booktitle={Principle-based parsing}, + pages={257--278}, + year={1991}, + publisher={Springer} +} + +@misc{ship_of_theseus, + author = {prefix=the, family={British Museum}}, + title = {Ariadne waking on the shore of Naxos}, + date = {-0099/0100}, + origlocation = {Herculaneum}, + url = {https://www.britishmuseum.org/collection/image/254690001}, + addendum = {Wall painting from Herculaneum, Asset number: 254690001, Museum number: 1867,0508.1358} +} + +@inproceedings{sift, + title = {\textsc{bbn}: Description of the \textsc{sift} System as Used for \textsc{muc-7}}, + author = {Scott Miller and Michael Crystal and Heidi Fox and Lance Ramshaw and Richard Schwartz and Rebecca Stone and Ralph Weische\-del and {The Annotation Group}}, + booktitle = {Seventh Message Understanding Conference (\textsc{muc-7}): Proceedings of a Conference Held in Fairfax, Virginia, {A}pril 29 -- May 1, 1998}, + year = {1998}, + shortseries = {\textsc{muc}}, + url = {https://aclanthology.org/M98-1009} +} + +@inproceedings{sinkhorn, + author = {Cuturi, Marco}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. J. C. Burges and L. Bottou and M. Welling and Z. Ghahramani and K. Q. Weinberger}, + publisher = {Curran Associates, Inc.}, + title = {Sinkhorn Distances: Lightspeed Computation of Optimal Transport}, + url = {https://proceedings.neurips.cc/paper/2013/file/af21d0c97db2e27e13572cbf59eb343d-Paper.pdf}, + volume = {26}, + year = {2013} +} + +@inproceedings{snowball, + author = {Eugene Agichtein and Luis Gravano}, + title = {Snowball: Extracting Relations from Large Plain-Text Collections}, + year = {2000}, + isbn = {158113231X}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + url = {https://dl.acm.org/doi/pdf/10.1145/336597.336644}, + doi = {10.1145/336597.336644}, + booktitle = {Proceedings of the Fifth \textsc{acm} Conference on Digital Libraries}, + pages = {85--94}, + location = {San Antonio, Texas, \textsc{usa}}, + shortseries = {\textsc{dl}} +} + +@misc{span_prediction, + title = {Relation Classification as Two-way Span-Prediction}, + author = {Amir DN Cohen and Shachar Rosenman and Yoav Goldberg}, + year = {2021}, + eprint = {2010.04829}, + archivePrefix = {arXiv}, + primaryClass = {cs.CL}, + note = {Under review for \textsc{acl}~2022.}, + shortseries = {\emph{under review}}, + url = {https://arxiv.org/abs/2010.04829} +} + +@inproceedings{spotlight, + author = {Mendes, Pablo N. and Jakob, Max and García-Silva, Andrés and Bizer, Christian}, + title = {\textsc{db}pedia Spotlight: Shedding Light on the Web of Documents}, + year = {2011}, + isbn = {9781450306218}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + url = {https://dl.acm.org/doi/pdf/10.1145/2063518.2063519}, + doi = {10.1145/2063518.2063519}, + booktitle = {Proceedings of the 7th International Conference on Semantic Systems}, + pages = {1--8}, + location = {Graz, Austria}, + series = {I-Semantics '11} +} + +@article{statistical_methods, + title = {Statistical methods and linguistics}, + author = {Steven Abney}, + journal = {The balancing act: Combining symbolic and statistical approaches to language}, + pages = {1--26}, + year = {1996}, +} + +@article{svm, + title = {Support-vector networks}, + author = {Corinna Cortes and Vladimir Vapnik}, + journal = {Machine learning}, + volume = {20}, + number = {3}, + pages = {273--297}, + year = {1995}, + issn = {1573-0565}, + doi = {10.1007/BF00994018}, + publisher = {Springer} +} + +@inproceedings{syntactic_formatting, + author = {Naomi Sager}, + title = {Syntactic Formatting of Science Information}, + year = {1972}, + isbn = {9781450379137}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + doi = {10.1145/1480083.1480101}, + booktitle = {Proceedings of the December 5-7, 1972, Fall Joint Computer Conference, Part II}, + pages = {791--800}, + shortseries = {\textsc{afips}}, + location = {Anaheim, California}, + doi = {10.1145/1480083.1480101}, + url = {https://dl.acm.org/doi/pdf/10.1145/1480083.1480101}, +} + +@article{t5, + author = {Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu}, + title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer}, + journal = {Journal of Machine Learning Research}, + year = {2020}, + volume = {21}, + number = {140}, + pages = {1-67}, + shortseries = {\textsc{jmlr}}, + url = {http://jmlr.org/papers/v21/20-074.html} +} + +@article{tdnn, + title = {Phoneme recognition using time-delay neural networks}, + author = {Waibel, Alex and Hanazawa, Toshiyuki and Hinton, Geoffrey and Shikano, Kiyohiro and Lang, Kevin J}, + journal = {\textsc{ieee} transactions on acoustics, speech, and signal processing}, + volume = {37}, + number = {3}, + pages = {328--339}, + year = {1989}, + publisher = {Institute of Electrical and Electronics Engineers} +} + +@inproceedings{textrunner_assessor, + author = {Doug Downey and Oren Etzioni and Stephen Soderland}, + title = {A probabilistic model of redundancy in information extraction}, + year = {2005}, + booktitle = {Proceedings of the 19th International Joint Conference on Artifical Intelligence}, + shortseries = {\textsc{ijcai}}, + pages = {1028--1033}, + url = {https://www.ijcai.org/Proceedings/05/Papers/1390.pdf} +} + +@inproceedings{textrunner_synonym, + title = {{T}ext{R}unner: Open Information Extraction on the Web}, + author = {Alexander Yates and Michele Banko and Matthew Broadhead and Michael Cafarella and Oren Etzioni and Stephen Soderland}, + booktitle = {Proceedings of Human Language Technologies: The Annual Conference of the North {A}merican Chapter of the Association for Computational Linguistics (\textsc{naacl}-\textsc{hlt})}, + month = {4}, + year = {2007}, + address = {Rochester, \textsc{ny}, \textsc{usa}}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/N07-4013}, + shortseries = {\textsc{nacl}}, + pages = {25--26} +} + +@inproceedings{textrunner_resolver, + title = {Unsupervised Resolution of Objects and Relations on the Web}, + author = {Alexander Yates and Oren Etzioni}, + booktitle = {Human Language Technologies 2007: The Conference of the North {A}merican Chapter of the Association for Computational Linguistics; Proceedings of the Main Conference}, + month = {4}, + year = {2007}, + address = {Rochester, New York}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/N07-1016}, + pages = {121--130} +} + +@inproceedings{text_to_text_re, + title = {Neural Relation Extraction for Knowledge Base Enrichment}, + author = {Trisedya, Bayu Distiawan and Weikum, Gerhard and Qi, Jianzhong and Zhang, Rui}, + booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics}, + month = {7}, + year = {2019}, + address = {Florence, Italy}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/P19-1023}, + doi = {10.18653/v1/P19-1023}, + pages = {229--240} +} + +@book{thermodynamique, + title={Thermodynamique}, + author={Henri Poincaré}, + year={1908}, + publisher={Gauthier-Villars} +} + +@thesis{these_boulanger, + title = {Contribution à l'étude des équations différentielles linéaires et homogènes intégrables algébriquement}, + author = {Auguste Boulanger}, + year = {1897}, + type = {Thèses de doctorat}, + publisher = {Gauthier-Villars} +} + +@misc{time_aware_re, + title = {Towards Time-Aware Distant Supervision for Relation Extraction}, + author = {Tianwen Jiang and Sendong Zhao and Jing Liu and Jin-Ge Yao and Ming Liu and Bing Qin and Ting Liu and Chin-Yew Lin}, + year = {2019}, + eprint = {1903.03289}, + archivePrefix = {arXiv}, + primaryClass = {cs.CL} +} + +@inproceedings{transe, + author = {Bordes, Antoine and Usunier, Nicolas and Garcia-Duran, Alberto and Weston, Jason and Yakhnenko, Oksana}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. J. C. Burges and L. Bottou and M. Welling and Z. Ghahramani and K. Q. Weinberger}, + shortseries = {\textsc{n}eur\textsc{ips}}, + publisher = {Curran Associates, Inc.}, + title = {Translating Embeddings for Modeling Multi-relational Data}, + url = {https://proceedings.neurips.cc/paper/2013/file/1cecc7a77928ca8133fa24680a88d2f9-Paper.pdf}, + volume = {26}, + year = {2013} +} + +@inproceedings{transformers, + author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, Łukasz and Polosukhin, Illia}, + booktitle = {Advances in Neural Information Processing Systems}, + shortseries = {\textsc{n}eur\textsc{ips}}, + editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett}, + publisher = {Curran Associates, Inc.}, + title = {Attention is All you Need}, + url = {https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf}, + volume = {30}, + year = {2017} +} + +@inproceedings{transh, + author = {Wang, Zhen and Zhang, Jianwen and Feng, Jianlin and Chen, Zheng}, + title = {Knowledge Graph Embedding by Translating on Hyperplanes}, + year = {2014}, + publisher = {\textsc{aaai} Press}, + booktitle = {Proceedings of the Twenty-Eighth \textsc{aaai} Conference on Artificial Intelligence}, + pages = {1112--1119}, + location = {Qu\'{e}bec City, Qu\'{e}bec, Canada}, + series = {\textsc{aaai}'14} +} + +@inproceedings{transr, + author = {Lin, Yankai and Liu, Zhiyuan and Sun, Maosong and Liu, Yang and Zhu, Xuan}, + title = {Learning Entity and Relation Embeddings for Knowledge Graph Completion}, + year = {2015}, + isbn = {0262511290}, + publisher = {\textsc{aaai} Press}, + booktitle = {Proceedings of the Twenty-Ninth \textsc{aaai} Conference on Artificial Intelligence}, + pages = {2181--2187}, + location = {Austin, Texas}, + shortseries = {\textsc{aaai}} +} + +@inproceedings{trex, + title = {\textsc{t-re}x: A Large Scale Alignment of Natural Language with Knowledge Base Triples}, + author = {Hady Elsahar and Pavlos Vougiouklis and Arslen Remaci and Christophe Gravier and Jonathon Hare and Frederique Laforest and Elena Simperl}, + booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (\textsc{lrec} 2018)}, + month = {5}, + year = {2018}, + address = {Miyazaki, Japan}, + publisher = {European Language Resources Association (\textsc{elra})}, + shortseries = {\textsc{lrec}}, + url = {https://aclanthology.org/L18-1544}, +} + +@article{turing_test, + author = {Alan Mathison Turing}, + title = {Computing Machinery and Intelligence}, + journal = {Mind}, + volume = {LIX}, + number = {236}, + pages = {433--460}, + year = {1950}, + month = {10}, + issn = {0026-4423}, + doi = {10.1093/mind/LIX.236.433}, + shortseries = {Mind}, + url = {https://academic.oup.com/mind/article-pdf/LIX/236/433/30123314/lix-236-433.pdf} +} + +@article{unified_nlp, + author = {Collobert, Ronan and Weston, Jason}, + title = {A unified architecture for natural language processing: deep neural networks with multitask learning}, + booktitle = {Proceedings of the 25th Annual International Conference on Machine Learning}, + shortseries = {\textsc{icml}}, + location = {Helsinki, Finland}, + editor = {McCallum, Andrew and Roweis, Sam}, + publisher = {Omnipress}, + year = {2008}, + pages = {160--167}, + doi = {10.1145/1390156.1390177}, + url = {https://dl.acm.org/doi/pdf/10.1145/1390156.1390177}, +} + +@article{universal_approximator_sigmoid, + title={Approximation by superpositions of a sigmoidal function}, + author={Cybenko, George}, + journal={Mathematics of control, signals and systems}, + volume={2}, + number={4}, + pages={303--314}, + year={1989}, + publisher={Springer} +} + +@article{universal_approximator_nonpolynomial, + title={Multilayer feedforward networks with a nonpolynomial activation function can approximate any function}, + author={Leshno, Moshe and Lin, Vladimir Ya and Pinkus, Allan and Schocken, Shimon}, + journal={Neural networks}, + volume={6}, + number={6}, + pages={861--867}, + year={1993}, + publisher={Elsevier} +} + +@inproceedings{universal_schemas, + title = {Relation Extraction with Matrix Factorization and Universal Schemas}, + author = {Sebastian Riedel and Limin Yao and Andrew McCallum and Benjamin Marlin}, + booktitle = {Proceedings of the 2013 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies}, + month = {6}, + year = {2013}, + address = {Atlanta, Georgia}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/N13-1008}, + shortseries = {\textsc{nacl}}, + pages = {74--84} +} + +@inproceedings{v-measure, + title = {{V}-Mea\-sure: A Conditional Entropy-Based External Cluster Evaluation Measure}, + author = {Andrew Rosenberg and Julia Hirschberg}, + booktitle = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (\textsc{emnlp}-\textsc{c}o\textsc{nll})}, + month = {6}, + year = {2007}, + address = {Prague, Czech Republic}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/D07-1043}, + shortseries = {\textsc{emnlp}}, + pages = {410--420} +} + +@inproceedings{vae, + author = {Diederik P. Kingma and Max Welling}, + editor = {Yoshua Bengio and Yann LeCun}, + title = {Auto-Encoding Variational Bayes}, + booktitle = {2nd International Conference on Learning Representations, \textsc{iclr} 2014, Banff, \textsc{ab}, Canada, April 14-16, 2014, Conference Track Proceedings}, + year = {2014}, + shortseries = {\textsc{iclr}}, + url = {http://arxiv.org/abs/1312.6114} +} + +@article{vae_re, + title = {Discrete-State Variational Autoencoders for Joint Discovery and Factorization of Relations}, + author = {Diego Marcheggiani and Ivan Titov}, + journal = {Transactions of the Association for Computational Linguistics}, + volume = {4}, + year = {2016}, + url = {https://aclanthology.org/Q16-1017}, + doi = {10.1162/tacl_a_00095}, + shortseries = {\textsc{tacl}}, + pages = {231--244} +} + +@inproceedings{vae_re2, + title = {Unsupervised Relation Extraction: A Variational Autoencoder Approach}, + author = {Yuan, Chenhan and Eldardiry, Hoda}, + booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + month = {11}, + year = {2021}, + address = {Online and Punta Cana, Dominican Republic}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/2021.emnlp-main.147}, + doi = {10.18653/v1/2021.emnlp-main.147}, + pages = {1929--1938}, +} + +@article{voted_perceptron, + title = {Large margin classification using the perceptron algorithm}, + author = {Yoav Freund and Robert E. Schapire}, + journal = {Machine learning}, + volume = {37}, + number = {3}, + pages = {277--296}, + year = {1999}, + doi = {10.1023/A:1007662407062}, + issn = {1573-0565}, + publisher = {Springer} +} + +@misc{wavenet, + title = {WaveNet: A Generative Model for Raw Audio}, + author = {van den Oord, Aäron and Dieleman, Sander and Zen, Heiga and Simonyan, Karen and Vinyals, Oriol and Graves, Alex and Kalchbrenner, Nal and Senior, Andrew and Kavukcuoglu, Koray}, + year = {2016}, + eprint = {1609.03499}, + archivePrefix = {arXiv}, + primaryClass = {cs.SD} +} + +@book{weaving_the_web, + title={Weaving the Web: The original design and ultimate destiny of the World Wide Web by its inventor}, + author={Tim Berners-Lee}, + year={1999}, + publisher={Harper San Francisco} +} + +@article{weisfeiler-leman, + title = {The reduction of a graph to canonical form and the algebra which appears therein}, + author = {Weisfeiler, Boris and Leman, Andreĭ}, + journal = {\textsc{nti}, Series}, + volume = {2}, + number = {9}, + pages = {12--16}, + year = {1968}, + shortseries = {\textsc{nti}}, + url = {https://www.iti.zcu.cz/wl2018/pdf/wl_paper_translation.pdf} +} + +@inbook{weisfeiler-leman_complexity, + author = {Immerman, Neil and Lander, Eric}, + editor = {Selman, Alan L.}, + title = {Describing Graphs: A First-Order Approach to Graph Canonization}, + booktitle = {Complexity Theory Retrospective: In Honor of Juris Hartmanis on the Occasion of His Sixtieth Birthday, July 5, 1988}, + year = {1990}, + publisher = {Springer New York}, + address = {New York, \textsc{ny}, \textsc{usa}}, + pages = {59--81}, + isbn = {978-1-4612-4478-3}, + doi = {10.1007/978-1-4612-4478-3_5}, + url = {https://www.cs.yale.edu/publications/techreports/tr605.pdf} +} + + +@article{weisfeiler-leman_fail, + title = {An optimal lower bound on the number of variables for graph identification}, + author = {Cai, Jin-Yi and Fürer, Martin and Immerman, Neil}, + journal = {Combinatorica}, + volume = {12}, + number = {4}, + pages = {389--410}, + year = {1992}, + shortseries = {Combinatorica}, + publisher = {Springer}, + url = {https://people.cs.umass.edu/~immerman/pub/opt.pdf} +} + +@inproceedings{weisfeiler-leman_sparse, + author = {Morris, Christopher and Rattan, Gaurav and Mutzel, Petra}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {H. Larochelle and M. Ranzato and R. Hadsell and M. F. Balcan and H. Lin}, + pages = {21824--21840}, + publisher = {Curran Associates, Inc.}, + title = {Weisfeiler and Leman go sparse: Towards scalable higher-order graph embeddings}, + url = {https://proceedings.neurips.cc/paper/2020/file/f81dee42585b3814de199b2e88757f5c-Paper.pdf}, + volume = {33}, + year = {2020} +} + +@inproceedings{weisfeiler-leman_wasserstein, + author = {Togninalli, Matteo and Ghisu, Elisabetta and Llinares-López, Felipe and Rieck, Bastian and Borgwardt, Karsten}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett}, + publisher = {Curran Associates, Inc.}, + title = {Wasserstein Weisfeiler-Lehman Graph Kernels}, + url = {https://proceedings.neurips.cc/paper/2019/file/73fed7fd472e502d8908794430511f4d-Paper.pdf}, + volume = {32}, + year = {2019} +} + +@article{wikidata, + author = {Vrandečić, Denny and Krötzsch, Markus}, + title = {Wikidata: A Free Collaborative Knowledgebase}, + year = {2014}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + volume = {57}, + number = {10}, + issn = {0001-0782}, + doi = {10.1145/2629489}, + journal = {Communications of the \textsc{acm}}, + month = {9}, + pages = {78--85}, + shortseries = {\textsc{cacm}}, + doi = {10.1145/2629489}, + url = {https://dl.acm.org/doi/pdf/10.1145/2629489} +} + +@inproceedings{wmt2010, + title = {Findings of the 2010 Joint Workshop on Statistical Machine Translation and Metrics for Machine Translation}, + author = {Callison-Burch, Chris and Koehn, Philipp and Monz, Christof and Peterson, Kay and Przybocki, Mark and Zaidan, Omar}, + booktitle = {Proceedings of the Joint Fifth Workshop on Statistical Machine Translation and {M}etrics\textsc{matr}}, + month = {7}, + year = {2010}, + address = {Uppsala, Sweden}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/W10-1703}, + pages = {17--53}, +} + +@inproceedings{wmt2020, + title = {Findings of the 2020 Conference on Machine Translation (\textsc{wmt}20)}, + author = {Barrault, Lo{\"\i}c and Biesialska, Magdalena and Bojar, Ond{\v{r}}ej and Costa-juss{\`a}, Marta R. and Federmann, Christian and Graham, Yvette and Grundkiewicz, Roman and Haddow, Barry and Huck, Matthias and Joanis, Eric and Kocmi, Tom and Koehn, Philipp and Lo, Chi-kiu and Ljube{\v{s}}i{\'c}, Nikola and Monz, Christof and Morishita, Makoto and Nagata, Masaaki and Nakazawa, Toshiaki and Pal, Santanu and Post, Matt and Zampieri, Marcos}, + booktitle = {Proceedings of the Fifth Conference on Machine Translation}, + month = {11}, + year = {2020}, + address = {Online}, + publisher = {Association for Computational Linguistics}, + url = {https://aclanthology.org/2020.wmt-1.1}, + pages = {1--55} +} + +@misc{word2vec, + title = {Efficient Estimation of Word Representations in Vector Space}, + author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey}, + year = {2013}, + eprint = {1301.3781}, + archivePrefix = {arXiv}, + primaryClass = {cs.CL} +} + +@inproceedings{word2vec_follow-up, + author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and Dean, Jeff}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. J. C. Burges and L. Bottou and M. Welling and Z. Ghahramani and K. Q. Weinberger}, + publisher = {Curran Associates, Inc.}, + title = {Distributed Representations of Words and Phrases and their Compositionality}, + shortseries = {\textsc{n}eur\textsc{ips}}, + url = {https://proceedings.neurips.cc/paper/2013/file/9aa42b31882ec039965f3c4923ce901b-Paper.pdf}, + volume = {26}, + year = {2013} +} + +@inproceedings{word2vec_pmi, + title = {Neural Word Embedding as Implicit Matrix Factorization}, + author = {Levy, Omer and Goldberg, Yoav}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {Z. Ghahramani and M. Welling and C. Cortes and N. Lawrence and K. Q. Weinberger}, + publisher = {Curran Associates, Inc.}, + shortseries = {\textsc{n}eur\textsc{ips}}, + url = {https://proceedings.neurips.cc/paper/2014/file/feab05aa91085b7a8012516bc3533958-Paper.pdf}, + volume = {27}, + year = {2014} +} + +@article{wordnet, + author = {Miller, George A.}, + title = {WordNet: A Lexical Database for English}, + year = {1995}, + publisher = {Association for Computing Machinery}, + address = {New York, \textsc{ny}, \textsc{usa}}, + volume = {38}, + number = {11}, + issn = {0001-0782}, + doi = {10.1145/219717.219748}, + journal = {Communications of the \textsc{acm}}, + month = {11}, + pages = {39--41} +} + +@inproceedings{xlm, + author = {Conneau, Alexis and Lample, Guillaume}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alché-Buc and E. Fox and R. Garnett}, + publisher = {Curran Associates, Inc.}, + title = {Cross-lingual Language Model Pretraining}, + url = {https://proceedings.neurips.cc/paper/2019/file/c04c19c2c2474dbf5f7ac4372c5b9af1-Paper.pdf}, + volume = {32}, + year = {2019} +} + +@inproceedings{xlnet, + author = {Yang, Zhilin and Dai, Zihang and Yang, Yiming and Carbonell, Jaime and Salakhutdinov, Russ R and Le, Quoc V}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alché-Buc and E. Fox and R. Garnett}, + publisher = {Curran Associates, Inc.}, + title = {\textsc{xln}et: Generalized Autoregressive Pretraining for Language Understanding}, + url = {https://proceedings.neurips.cc/paper/2019/file/dc6a7e655d7e5840e66733e9ee67cc69-Paper.pdf}, + volume = {32}, + year = {2019} +} diff --git a/thesis.cls b/thesis.cls @@ -0,0 +1,788 @@ +\NeedsTeXFormat{LaTeX2e} +\ProvidesClass{thesis}[2021/01/01 Local class] + +% Some colored output for ease of debugging +\directlua{tcolor=require("lib/terminal color")} + +\RequirePackage{etoolbox} % Because we are in the third millennium +\RequirePackage{expl3} % For LaTeX3 code +\RequirePackage{luacode} % Handling of lua code inside tex files +\RequirePackage{pgffor} % For \foreach loop +\RequirePackage{xparse} % For modern \NewDocumentCommand, etc +\RequirePackage{xkeyval} % For handling class options + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Common Error Help Texts %%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\NewDocumentCommand\thesis@warning{m}{\directlua{tcolor.warning("\luatexluaescapestring{#1}")}} + +\NewDocumentCommand\thesis@options@error{m}{ + \ClassError{thesis}{#1}{% + The options provided to the thesis class must explicitly contain one of `print'\MessageBreak + or `digital'. + }% +} + +\NewDocumentCommand\thesis@patch@error{m m}{ + \csname #1Error\endcsname{thesis}{#2}{% + You're most likely seeing this error because of modifications to the latex\MessageBreak + ecosystem which are not compatible with the thesis code source. To ensure the\MessageBreak + code is compiled correctly, use the texlive 2021 distribution as described in\MessageBreak + the README. Another option is to adapt the \protect\patchcmd\space responsible for this\MessageBreak + error, although this might prove more time consuming. + }% +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Class Options %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% print & digital +\newif\ifthesis@digital +\newif\ifthesis@print +\DeclareOptionX{digital}{\thesis@digitaltrue} +\DeclareOptionX{print}{\thesis@printtrue} + +% debug +\newcount\thesis@debug +\thesis@debug=0 +\DeclareOptionX{thesisdebug}{\thesis@debug=#1} + +% lineno +\newif\ifthesis@lineno +\thesis@linenofalse +\DeclareOptionX{lineno}{\thesis@linenotrue} + +% summary +\newif\ifthesissummary +\thesissummaryfalse +\DeclareOptionX{summary}{\thesissummarytrue} + +\ProcessOptionsX\relax + +% check print & digital consistency +\ifthesis@digital\ifthesis@print + \thesis@options@error{Can't enable both `digital' and `print' options at the same time} +\fi\fi +\ifthesis@digital\else\ifthesis@print\else + \thesis@options@error{Either `digital' or `print' must be chosen} +\fi\fi + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Class Setup %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\PassOptionsToClass{a4paper,10pt}{book} +\LoadClass{book} + +% Basic dependencies +\RequirePackage{fontspec} % For modern font interface +\RequirePackage[table]{xcolor} % For color handling +\RequirePackage{csquotes} % For context sensitive quotes +\RequirePackage{caption} % For custom float and caption style +\RequirePackage{booktabs} % For fancy \toprule &cie +\RequirePackage{marginnote} % For non-floating marginpar + +% A keywords definition command, used for PDF metadata and in the manuscript +\DeclareRobustCommand\keywords[1]{\gdef\@keywords{#1}} + +% Continue page numbering when switching to main matter +\patchcmd{\mainmatter}{\pagenumbering{arabic}}{\gdef\thepage{\@arabic\c@page}}{}{\ClassError{thesis}{Can't keep continuous folio, patch failed.}} + +% Simple space after period +\frenchspacing + +% Use smaller font inside the margin column and be more lenient to bad spacing +\NewDocumentCommand\marginsize{}{\fontsize\@viiipt{9.5}\selectfont\hbadness=7500} +\DeclareCaptionFont{marginsize}{\marginsize} + +%%%%%%%%%%%%%%%%%%%% +% Margin Paragraph % +%%%%%%%%%%%%%%%%%%%% +\let\oldmarginpar\marginpar +\RenewDocumentCommand\marginpar{+m}{\oldmarginpar{\marginsize{}\ignorespaces#1}} +\NewDocumentEnvironment{marginparagraph}{o +b} + {% + \IfNoValueTF{#1}{% + \marginpar{#2}% + }{% + \marginnote{#2}[#1]% + }% + }{} +\AfterEndEnvironment{marginparagraph}{\ignorespaces} + +% For marginnote package +\RenewDocumentCommand\marginfont{}{\marginsize{}} +\RenewDocumentCommand\raggedleftmarginnote{}{} +\RenewDocumentCommand\raggedrightmarginnote{}{} + +%%%%%%%%%%%%%%% +% Debug boxes % +%%%%%%%%%%%%%%% +% Mark overfull \hbox{}es +\ifnum\thesis@debug>0 + \setlength\overfullrule{2mm} +\fi + +% Display box construction +\ifnum\thesis@debug>1 + \RequirePackage{lua-visual-debug} +\fi + +%%%%%%%%%%%%%%% +% Page Layout % +%%%%%%%%%%%%%%% +% This instantiate two geometries: withmarginpar and withoutmarginpar +\directlua{require("lib/layout").set{ + twoside=\ifthesis@print true\else false\fi, + top="2cm", + mpwidth="5cm", + mpsep="5mm", + debug=\ifnum\thesis@debug>1 true\else false\fi}} + +%%%%%%%%%%% +% Headers % +%%%%%%%%%%% +\RequirePackage{fancyhdr} +\fancyhf{} +\RenewDocumentCommand\headrulewidth{}{0mm} +\RenewDocumentCommand\footrulewidth{}{0mm} + +\ifthesis@digital + \fancypagestyle{plain}{% + \fancyhf[HR]{\thepage}% + \fancyhf[HEL]{\ifthesissummary\rightmark\else\leftmark\fi}% + \fancyhf[HOL]{\rightmark}% + } +\else %print + \fancypagestyle{plain}{% + \fancyhf[HEL,HOR]{\thepage}% + \fancyhf[HER]{\ifthesissummary\rightmark\else\leftmark\fi}% + \fancyhf[HOL]{\rightmark}% + } +\fi +\pagestyle{plain} + +% Setup header content +\RenewDocumentCommand\chaptermark{m}{\markboth{\if@mainmatter\thechapter\ \fi#1}{}} +\RenewDocumentCommand\sectionmark{m}{\markright{\thesection\ #1}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Commands for switching geometry % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% marginparwidth + marginparsep +\NewDocumentCommand\margintotal{}{55mm} + +\NewDocumentCommand\withmarginpar{}{% + \loadgeometry{withmarginpar}% + \fancyheadoffset[R]{\margintotal}% fix fancyhdr + \edef\marginnotetextwidth{\the\textwidth}% fix marginnote + \setlength{\@sidenotes@extrawidth}{\margintotal}%fix sidenotes +} +\NewDocumentCommand\withoutmarginpar{}{% + \loadgeometry{withoutmarginpar}% + \fancyheadoffset[R]{0mm}% fix fancyhdr + \setlength{\@sidenotes@extrawidth}{0mm}%fix sidenotes +} + +% Temporary fullwidth environment +\NewDocumentEnvironment{fullwidth}{} + {\begin{adjustwidth}{}{-\margintotal}} + {\end{adjustwidth}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Add version information to every page % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\RequirePackage{eso-pic} % for AddToShipoutPictureFG +\directlua{draft_version = require("lib/draft version")} +\NewDocumentCommand\draftVersion{}{\directlua{draft_version.draft_version()}} +\ifnum\thesis@debug>0 + \AddToShipoutPictureFG{% + \AtPageLowerLeft{% + \hspace{2mm}% + \makebox[0pt][l]{% + \rotatebox{90}{% + \hspace{2mm}% + \color{black}\ttfamily\footnotesize % + draft \draftVersion}}}} +\fi + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Add line numbers to every pages % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\RequirePackage{siunitx} +\ifthesis@lineno + \newcount\thesis@lineno + \thesis@lineno=1 + \AddToShipoutPictureBG{% + \ifnum\value{page}>2\relax% + \AtPageUpperLeft{% + \ifthesis@print% + \ifodd\c@page% + \hspace{5mm}% + \else% + \hspace{19mm}% + \fi% + \else% + \hspace{7.5mm}% + \fi% + \normalfont\normalsize% + \begin{minipage}[t]{1cm}% + \begin{singlespace}% + \hbadness=10000% + \vskip 32mm% + \foreach \i in {1,...,54}{% + \makeatletter% + \textcolor{black!50}{\num[minimum-integer-digits=4]{\the\thesis@lineno}}\\% + \global\advance\thesis@lineno1\relax% + \makeatother% + }% + \end{singlespace}% + \end{minipage}% + }% + \fi% + } +\fi + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Report all moved marginpar at the end % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\directlua{moved_marginpar = require("lib/moved marginpar")} +\NewDocumentCommand\marginparmoved{m}{\directlua{moved_marginpar.declare(#1)}} +\patchcmd{\@addmarginpar} + {\@latex@warning@no@line {Marginpar on page \thepage\space moved}} + {\marginparmoved{\thepage}} + {} + {\thesis@warning{Patch failed, can't remove annoying `moved marginpar' messages.}} + +%%%%%%%%%% +% Titles % +%%%%%%%%%% +\RequirePackage{titlesec} +\newfontfamily\garamond{EB Garamond}[Ligatures=TeX] +\titleformat{\chapter}% command + [display]% shape + {\garamond\huge}% format + {\chaptertitlename\ \thechapter}% label + {7.5mm}% label-title separation + {\begin{fullwidth}\raggedright\Huge}% title before code + [\end{fullwidth}]% title after code +\titleformat*{\section}{\garamond\raggedright\LARGE} +\titleformat*{\subsection}{\garamond\Large} +\titleformat*{\subsubsection}{\garamond\large} + +% Number \subsubsection +\setcounter{secnumdepth}{3} +\setcounter{tocdepth}{3} + +\ifthesissummary + \RenewDocumentCommand\thesection{}{\@arabic\c@section} +\fi + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Fonts %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\RequirePackage{amsmath} +\RequirePackage{amssymb} +\RequirePackage{mathtools} +\RequirePackage[math-style=ISO,warnings-off={mathtools-colon,mathtools-overbracket}]{unicode-math} + +% Use the old \mathcal style +\DeclareMathAlphabet{\mathcal}{OMS}{cmsy}{m}{n} + +% Use the standard Latin Modern font for most things +\setmathfont{LatinModernMath} + +% Except for the fancy script style +\setmathfont[range=scr]{XITS Math} + +% Symbols with no glyph in Latin Modern +\setmathfont[range={\setminus}]{XITS Math} + +%%%%%%%%%%%%%%%%%%%%%%%%% +% Uppercase PDF strings % +%%%%%%%%%%%%%%%%%%%%%%%%% +\usepackage{letltxmacro} % For let'ing robust command +\LetLtxMacro{\oldtextsc}{\textsc} + +% Avoid hyperref redefinition +\AtBeginDocument{ + \patchcmd{\pdfstringdef}% + {\textsc}% + {\oldtextsc}% + {}% + {\thesis@patch@error{Class}{% + Patch failed, can't change \protect\pdfstringdef\space to fix \protect\textsc\MessageBreak + for uppercase PDF string.}}% +} + +\ExplSyntaxOn +\def\textsc#1{% + \texorpdfstring% + {{% + \ifdim\fontdimen1\font>0pt\slshape\fi% Italic small caps can appear in the bibliography but are not available in latin modern, replace them with slanted small caps. + \oldtextsc{#1}}}% + {\text_uppercase:n{#1}}% Use uppercase for PDF strings (e.g. in PDF bookmarks) where small caps should appear. +} +\ExplSyntaxOff + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Floats Handling %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\RequirePackage{newfloat} % For defining new floats +\RequirePackage{tocloft} % For defining List of Illustrations +\RequirePackage[oneside]{sidenotes} % For margin floats + +\pretocmd{\sidenotetext}% + {\edef\@currentlabel{\thesidenote}}% + {}% + {\thesis@patch@error{Class}{Prepend failed, can't add label information to \string\sidenotetext.}}% + +% Add an \ignorespaces since a ~ is placed after the note number and we often start the note with a linebreak. +\RenewDocumentCommand\sidenote{o o +m}{% + \sidenotemark[#1]% + \sidenotetext[#1][#2]{\ignorespaces #3}% + \@sidenotes@multimarker% +} + +%%%%%%%%%%%%%%%%%%% +% Algorithm float % +%%%%%%%%%%%%%%%%%%% + +% List of Algorithms +\NewDocumentCommand\listalgorithmname{}{List of Algorithms} +\newlistof[chapter]{algorithm}{loa}{\listalgorithmname} +% Use the same spacing than the one of List of Figures/Tables +\setlength{\cftalgorithmnumwidth}{2.3em} +\setlength{\cftalgorithmindent}{1.5em} + +% Create the standard environment +\DeclareFloatingEnvironment[fileext=loa,name=Algorithm]{algorithm} + +% Create the margin environment +\newsavebox{\@sidenotes@marginalgorithmbox} +\DeclareCaptionStyle{marginalgorithm}{font=marginsize} +\NewDocumentEnvironment{marginalgorithm}{o} + {% + \begin{lrbox}{\@sidenotes@marginalgorithmbox}% + \begin{minipage}{\marginparwidth}% + \captionsetup{type=algorithm,style=marginalgorithm}% + } + {% + \end{minipage}% + \end{lrbox}% + \@sidenotes@placemarginal{#1}{\usebox{\@sidenotes@marginalgorithmbox}}% + } + +%%%%%%%%%%%%%%%%%%%%% +% Margin Appearance % +%%%%%%%%%%%%%%%%%%%%% +% Use the smaller font inside margins +\captionsetup{font=marginsize} +\AtBeginEnvironment{marginfigure}{\marginsize} +\AtBeginEnvironment{margintable}{\marginsize} +\AtBeginEnvironment{marginalgorithm}{\marginsize} + +% Smaller spacing between figure and caption +\setlength{\abovecaptionskip}{2mm} + +% Modify marginnote package to always print on the right +\patchcmd{\@mn@@@marginnote} + {\if@tempswa\rlap} + {\iftrue\rlap} + {} + {\thesis@patch@error{Class}{% + Patch failed, can't fix \protect\marginnote\space such that it always\MessageBreak + places on the right.}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Chapter handling for TOC & LOI % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\NewDocumentCommand\fixloititlefont{m}{ + \expandafter\gdef\csname cft#1titlefont\endcsname{\garamond\Huge} +} + +\foreach \loi in {toc, lof, lot, loa}{ + \expandafter\fixloititlefont\expandafter{\loi} +} + +\AddToHookNext{begindocument}{ + \pretocmd{\tableofcontents} + {\cleardoublepage\pdfbookmark[chapter]{\contentsname}{contents}} + {} + {\thesis@patch@error{Class}{Prepend failed, can't \string\cleardoublepage\space before \string\tableofcontents.}} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Lower case for special headers % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\NewDocumentCommand\MakeMarkcase{}{} % For biblatex +\NewDocumentCommand\thesis@lower@head{m m}{% Patch command for frontmatter lists + \RenewDocumentCommand{#1}{}{\@mkboth{#2}{#2}}% +} +\thesis@lower@head{\cftmarktoc}{\contentsname} +\thesis@lower@head{\cftmarklof}{\listfigurename} +\thesis@lower@head{\cftmarklot}{\listtablename} +\thesis@lower@head{\cftmarkloa}{\listalgorithmname} + +%%%%%%%%%%%%%%%%%%%%%%%% +% Algorithm formatting % +%%%%%%%%%%%%%%%%%%%%%%%% +\RequirePackage{algpseudocodex} +\NewDocumentCommand\FunctionOutput{}{\State\textsl{Output}:} +\NewDocumentCommand\FunctionOutputs{s}{% + \IfBooleanTF#1% + {\State\hphantom{\textsl{Outputs}:}}% + {\State\textsl{Outputs}:}% +} +\NewDocumentCommand\FunctionInput{}{\State\textsl{Input}:} +\NewDocumentCommand\FunctionInputs{s}{% + \IfBooleanTF#1% + {\State\hphantom{\textsl{Inputs}:}}% + {\State\textsl{Inputs}:}% +} +\RenewDocumentCommand\algorithmicfunction{}{\textbf{algorithm}} + +% Remove left margin in algorithms +\expandafter\patchcmd\csname\string\algorithmic\endcsname + {\leftmargin\labelwidth \addtolength{\leftmargin}{\labelsep}} + {\leftmargin 0mm} + {} + {\thesis@patch@error{Class}{Patch failed, can't remove left margin of \protect\algorithmic.}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Universal Caption Command % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Store the type of float (m=main area, s=side margin, w=wide) +\def\thesis@float@type{} + +% Modify \thesis@float@type to match the environment +\NewDocumentCommand\thesis@patch@float@type{m m}{% + \expandafter\pretocmd\csname#1\endcsname{\ifx\thesis@float@type\empty\def\thesis@float@type{#2}\fi}{}{\thesis@patch@error{Class}{Prepend failed, can't add float type information to #1.}}% + \expandafter\pretocmd\csname end#1\endcsname{\def\thesis@float@type{}}{}{\thesis@patch@error{Class}{Prepend failed, can't reinitialize float type information after #1.}}% +} +\thesis@patch@float@type{figure}{m} +\thesis@patch@float@type{figure*}{w} +\thesis@patch@float@type{marginfigure}{s} +\thesis@patch@float@type{table}{m} +\thesis@patch@float@type{table*}{w} +\thesis@patch@float@type{margintable}{s} +\thesis@patch@float@type{algorithm}{m} +\thesis@patch@float@type{marginalgorithm}{s} + +% Caption box for main area floats +\newsavebox{\thesis@caption@box} + +% Universal caption command: +% For side margin floats, use standard \caption +% For wide floats, use sidenotes' \sidecaption +% For main area floats, align the bottom of the caption with the bottom the figure, except if a star is given, then the tops are aligned. +\NewDocumentCommand\scaption{s O{#3} m O{0mm}}{% + \if m\thesis@float@type\relax% main area float + \begin{lrbox}{\thesis@caption@box}% + \begin{minipage}{\marginparwidth}% + \caption[#2]{#3}% + \end{minipage}% + \end{lrbox}% + \IfBooleanTF#1% + {\def\thesis@caption@offset{0mm}}% + {\def\thesis@caption@offset{-\ht\thesis@caption@box-\dp\thesis@caption@box}}% + \marginnote{\usebox{\thesis@caption@box}}[\dimexpr\thesis@caption@offset+#4\relax]% + \else% + \if s\thesis@float@type\relax% side margin float + \caption[#2]{#3}% + \else% wide float + \sidecaption[#2][#4]{#3}% + \fi% + \fi% +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Bibliograhpy %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\RequirePackage[ + style=authoryear-comp, + maxcitenames=2, + mincitenames=1, + maxbibnames=10, + minbibnames=9, + uniquelist=false, + dateera=secular, + dateeraauto=500, + dateuncertain=true + ]{biblatex} + +% Homogenize formatting of @misc title +\DeclareFieldFormat[misc]{title}{\mkbibquote{#1}} + +% Remove leading zeros for years earlier than 1000 +\let\blx@imc@mkyearzeros\blx@imc@stripzeros + +% Make prefix appears first in citation (de Saussure 1916) but not in the bibliography (Saussure, Ferdinand de…) +\AtBeginDocument{\toggletrue{blx@useprefix}} +\AtBeginBibliography{\togglefalse{blx@useprefix}} + +% Prefer smallcaps for era abbreviations +\DefineBibliographyStrings{english}{ + commonera = {\textsc{ce}}, + beforecommonera = {\textsc{bce}}, +} +\DefineBibliographyStrings{french}{ + commonera = {de~n.~è.}, + beforecommonera = {av.~n.~è.}, +} + +% Break URLs anywhere in bibliography +\setcounter{biburllcpenalty}{100} +\setcounter{biburlucpenalty}{200} +\setcounter{biburlnumpenalty}{100} + +%%%%%%%%%%%%%%%%%%%%%%%% +% References in margin % +%%%%%%%%%%%%%%%%%%%%%%%% +\DeclareFieldFormat{linkedtitle}{% + \iffieldundef{url}% + {% + \printfield{\currentfield}% + \thesis@warning{No url provided for \string\sidecited\space``\thefield{title}''}{}% + }% + {\mkbibquote{\href{\thefield{url}}{#1}\isdot}}% +} +\newbibmacro{shortseries}{\printfield{shortseries}} + +\NewDocumentCommand\citationBadness{}{\hbadness=7500} + +\DeclareCiteCommand{\sideciteContent} + {\citationBadness\usebibmacro{prenote}}% precode + {% loopcode + \printnames{labelname}% + \setunit{\printdelim{nametitledelim}}\newblock% + \printfield[linkedtitle]{title}% + \setunit{\addspace}% + \usebibmacro{shortseries}% + \setunit{\addspace}% + \printfield[bibhyperref]{year}% + }% end loopcode + {\par}% sepcode + {\usebibmacro{postnote}}% postcode + +\NewDocumentCommand\sidecite{m o}{% + \IfNoValueTF{#2}{% + \marginpar{\sideciteContent{#1}}% + }{% else if #2 is given + \marginnote{\sideciteContent{#1}}[#2]% + }% +} + +\NewDocumentCommand\thesis@new@cite{m}{% + \expandafter\NewDocumentCommand\csname #1x\endcsname{O{} O{} m o}{% + \csname #1\endcsname[##1][##2]{##3}% + \sidecite{##3}[##4]% + }% +} + +\thesis@new@cite{cite} +\thesis@new@cite{Cite} +\thesis@new@cite{textcite} +\thesis@new@cite{Textcite} +\thesis@new@cite{parencite} +\thesis@new@cite{Parencite} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Figure Drawing %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%% +% PGF/TikZ % +%%%%%%%%%%%% +\RequirePackage{tikz} +\usetikzlibrary{arrows.meta} % for more arrow shapes +\usetikzlibrary{backgrounds} % for drawing behind things +\usetikzlibrary{calc} % for let +\usetikzlibrary{colorbrewer} % for color schemes +\usetikzlibrary{decorations.pathreplacing} % for decorate +\usetikzlibrary{decorations.text} % for text along path +\usetikzlibrary{matrix} % for matrix of nodes +\usetikzlibrary{patterns} % for pattern +\usetikzlibrary{positioning} % for above=of +\usetikzlibrary{shapes.geometric} % for regular polygon +\usetikzlibrary{svg.path} + +% Use plain arrows +\tikzset{arrow/.style={-{Latex}}} + +% Consistent transparency values for figures +\def\transparencyDefault{0.3} +\def\transparencyLow{0.1} +\tikzset{faded/.style={opacity=\transparencyDefault}} +\tikzset{ultra faded/.style={opacity=\transparencyLow}} + +%%%%%%%%%%%%% +% PGF plots % +%%%%%%%%%%%%% +\RequirePackage{pgfplots} +\pgfplotsset{compat=1.17} + +% Use Tufte-inspired style +\pgfplotsset{ + modern/.style={ + enlargelimits=false, + separate axis lines, + semithick, + axis x line*=bottom, + axis x line shift=10pt, + axis y line*=left, + axis y line shift=10pt, + every axis/.append style={thick}, + tick style={thick, black}, + tick align=outside, + } +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Block Formatting %%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\NewDocumentEnvironment{spacedblock}{} + {\begin{trivlist}\item\relax} + {\end{trivlist}} + +\NewDocumentEnvironment{indentedexample}{} + {% + \begin{spacedblock}% + \hfill% + \begin{minipage}{\dimexpr\textwidth-2cm}% + } + {% + \end{minipage}% + \hfill\null% + \end{spacedblock}% + } + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Epigraph %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\RequirePackage{adjustbox} + +% For vertically flowing text (e.g. chinese) +\newsavebox{\thesis@tate@box} +\NewDocumentCommand\tate{m}{% + \begin{lrbox}{\thesis@tate@box}% + #1% + \end{lrbox}% + \boxdir\thesis@tate@box RTT% + \usebox{\thesis@tate@box}% +} + +% For a double quote character I like +\newfontfamily\bonum{TeX Gyre Bonum}[Ligatures=TeX] + +\NewDocumentEnvironment{epigraphcontent}{O{4cm} m m m +b} + {% + \textcolor{black!30}{\bonum\fontsize{36pt}{36pt}\selectfont\raisebox{-16pt}[8pt][0pt]{``}}% + \itshape\,\ignorespaces #5\strut\\% + \upshape\null\hfill% + \begin{minipage}[b]{#1}% + \vphantom{---}\llap{--- }% + \if\relax\detokenize{#2}\relax\else% + #2, % + \fi% + #3 (#4)\strut% + \end{minipage}% + }{} + +\NewDocumentEnvironment{translatedepigraphcontent}{O{4cm} m m m m +b} + {% + \textcolor{black!30}{\bonum\fontsize{36pt}{36pt}\selectfont\raisebox{-16pt}[8pt][0pt]{``}}% + \itshape\,\ignorespaces #6\strut\\% + \upshape\textcolor{black!30}{\bonum\fontsize{36pt}{36pt}\selectfont\raisebox{-16pt}[8pt][0pt]{``}}% + \itshape\,\ignorespaces #5\strut\\% + \upshape\null\hfill% + \begin{minipage}[b]{#1}% + \vphantom{---}\llap{--- }% + \if\relax\detokenize{#2}\relax\else% + #2, % + \fi% + #3 (#4)\strut% + \end{minipage}% + }{} + +\newlength{\thesis@epigraph@dash@width} +\settowidth{\thesis@epigraph@dash@width}{--- } +\NewDocumentEnvironment{epigraph}{O{4cm} m m m o o +b} + {% + \begin{marginparagraph}[#6]% + \raisebox{-\baselineskip}[0mm][\totalheight]{\parbox[t]{\marginparwidth}{% + \hbadness=7000% + \begin{epigraphcontent}[#1]{#2}{#3}{#4}% + #7% + \end{epigraphcontent}% + \IfValueT{#5}% + {\newline\null\hfill\begin{minipage}{\dimexpr#1+\thesis@epigraph@dash@width\relax}% + #5% + \end{minipage}}% + }}% + \end{marginparagraph}% + }{} + +\NewDocumentEnvironment{translatedepigraph}{O{4cm} m m m m o +b} + {% + \begin{marginparagraph}[#6]% + \raisebox{-\baselineskip}[0mm][\totalheight]{\parbox[t]{\marginparwidth}{% + \hbadness=7000% + \begin{translatedepigraphcontent}[#1]{#2}{#3}{#4}{#5}% + #7% + \end{translatedepigraphcontent}% + }}% + \end{marginparagraph}% + }{} + +% Fix problem with luatex vertical typesetting +\newlength{\thesis@tate@fix@horizontal} +\newlength{\thesis@tate@fix@vertical@height} +\newlength{\thesis@tate@fix@vertical@delta} +\NewDocumentCommand\tatefix{m m m}{% + \setlength{\thesis@tate@fix@horizontal}{#1}% + \setlength{\thesis@tate@fix@vertical@height}{#2}% + \setlength{\thesis@tate@fix@vertical@delta}{#3}% +} + +\NewDocumentEnvironment{cjkepigraphcontent}{O{} +b} + {% + \lapbox[\width]{\thesis@tate@fix@horizontal}{% + \begin{minipage}[b]{5mm}% + \raisebox{-\thesis@tate@fix@vertical@delta}[\dimexpr\totalheight-\thesis@tate@fix@vertical@height\relax][0mm]{\tate{#1{「#2」}}}% + \end{minipage}% + }% + \tatefix{0mm}{0mm}{0mm}% + }{} + +\NewDocumentEnvironment{cjkepigraph}{O{} m m o +b} + {% + \begin{marginparagraph}% + \begin{minipage}[b]{#2} + #3 + \end{minipage}% + \begin{cjkepigraphcontent}[#1]% + #5% + \end{cjkepigraphcontent}% + \IfValueT{#4}% + {\newline\null\hfill\begin{minipage}{#2}% + \hbadness=7000% + #4% + \end{minipage}}% + \end{marginparagraph}% + }{} diff --git a/thesis.sty b/thesis.sty @@ -0,0 +1,390 @@ +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{thesis}[2021/01/01 Local package] + +\RequirePackage[main=english,french,greek]{babel} +\RequirePackage{iflang} % For \IfLanguageName +\RequirePackage{lua-ul} % For \underLine +\RequirePackage{ccicons} % For Creative Commons licence logos +\RequirePackage{setspace} % For \onehalfspacing +\RequirePackage[inline]{enumitem} % For fancy itemize etc +\RequirePackage{array} % For \newcolumntype +\RequirePackage{tabularx} % For X-like column types +\RequirePackage{longtable} % For pagebreak inside a table +\RequirePackage{multirow} % For \multirow and \multicolumn +\RequirePackage{graphicx} % For \includegraphics on steroids +\RequirePackage{hyperref} % For hypertext functionalities +\RequirePackage{bookmark} % For \pdfbookmark + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Hyperref setup %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%% +% Set PDF metadata % +%%%%%%%%%%%%%%%%%%%% +\begingroup +\def\sep{;} +\edef\thesis@pdf@keywords{\@keywords} + +\hypersetup{ + pdfauthor={\@author}, + pdftitle={\@title}, + pdfsubject={\@title}, + pdfkeywords={\thesis@pdf@keywords}, + pdflang=en, +} +\endgroup + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Hyperlink configuration % +%%%%%%%%%%%%%%%%%%%%%%%%%%% +\hypersetup{ + linktoc=all, + colorlinks=true, + linkcolor=red!60!black, + citecolor=green!60!black, + filecolor=cyan!60!black, + menucolor=red!60!black, + urlcolor=magenta!60!black, + pdfdisplaydoctitle=true, +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Shorthand commands %%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%% +% Draft Annotations % +%%%%%%%%%%%%%%%%%%%%% +\NewDocumentCommand\draftnotecmd{m O{} O{} o m m}{ + \ifnum\thesis@debug>0 + \expandafter\NewDocumentCommand\csname\string#1\endcsname{+m}{\hbadness=10000\textcolor{#5}{#2\relax{}##1}} + \NewDocumentCommand#1{o +m}{\IfNoValueTF{##1}{\IfValueTF{#4}{#4\csname\string#1\endcsname{##2}}{\marginpar{\tiny\csname\string#1\endcsname{##2}}}}{\marginnote{\tiny\csname\string#1\endcsname{##2}}[##1]}\ignorespaces} + \else + \NewDocumentCommand#1{o +m}{\directlua{print("\string\27[#6;1mDRAFT MESSAGE: #3\luatexluaescapestring{\unexpanded{##1}}\string\27[0m")}} + \fi +} + +\draftnotecmd\B{red}{31} +\draftnotecmd\benj[][][]{red}{31} +\draftnotecmd\Ba[\(\hookrightarrow\)][→]{red}{31} +\draftnotecmd\V{green!67!black}{32} +\draftnotecmd\vinc[][][]{green!67!black}{32} +\draftnotecmd\R{orange}{33} +\draftnotecmd\reu[][][]{orange}{33} +\draftnotecmd\E{blue}{34} +\draftnotecmd\e[][][]{blue}{34} +\draftnotecmd\Ea[\(\hookrightarrow\)][→]{blue}{34} +\draftnotecmd\M{purple}{35} +\draftnotecmd\syr[][][]{purple}{35} + +%%%%%%%%%%%%%%%%%% +% Table commands % +%%%%%%%%%%%%%%%%%% +\newcolumntype{P}[1]{>{\footnotesize\raggedright\arraybackslash}p{#1}} +\newcolumntype{Y}{>{\footnotesize\raggedright\arraybackslash}X} + +% won't compile on a PDP-50… O: +\setcounter{LTchunksize}{100} + +%%%%%%%%%%%%%%%%% +% Math commands % +%%%%%%%%%%%%%%%%% +\DeclareMathOperator*\expectation{\symbb{E}} +\DeclareMathOperator*\entropy{H} +\DeclareMathOperator*\argmax{argmax} +\DeclareMathOperator\pmi{pmi} +\NewDocumentCommand\laplace{}{\symup{\Delta}} +\DeclareMathOperator\ReLU{ReLU} +\DeclareMathOperator\softmax{softmax} +\DeclareMathOperator\diagonal{diag} +\DeclareMathOperator\kl{D_\textsc{kl}} +\DeclareMathOperator\jsd{D_\textsc{jsd}} +\DeclareMathOperator\uniformDistribution{\symcal{U}} +\DeclareMathOperator\normalDistribution{\mathcal{N}} +\NewDocumentCommand\sigmoid{}{\mathop{\sigma}} +\NewDocumentCommand\diff{}{\mathop{}\!\mathrm{d}} + +% Independent symbol with two vertical bar +\newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}} +\def\independenT#1#2{\mathrel{\rlap{$#1#2$}\mkern2mu{#1#2}}} + +\NewDocumentCommand\middlerel{m}{\mathrel{}\middle#1\mathrel{}} + +% For multisets +\NewDocumentCommand\lMultiBrace{}{\left\{\mskip-7mu\left\{} +\NewDocumentCommand\rMultiBrace{}{\right\}\mskip-7mu\right\}} + +\newcommand\notindependent{\protect\mathpalette{\protect\notindependenT}{\perp}} +\def\notindependenT#1#2{\mathrel{\rlap{$#1#2$}\mkern2mu{#1\not#2}}} +% Comply with ISO 80000-2 (which mandates bold italic or \vec for vectors) +\NewDocumentCommand\vctr{m}{{\symbf{#1}}} +\NewDocumentCommand\mtrx{m}{{\symbf{#1}}} +\NewDocumentCommand\tnsr{m}{{\symbfsf{#1}}} +\NewDocumentCommand\rndm{m}{{\symup{#1}}} +\NewDocumentCommand\rndmvctr{m}{{\symbfup{#1}}} +\NewDocumentCommand\transposesym{}{\symsfup{T}} +\NewDocumentCommand\transpose{}{^{\mkern-1.5mu\transposesym}} + +% Used in subscript of \ReLU to indicates it is only applied to half the units +\NewDocumentCommand\halfCircleScript{}{ + \begin{tikzpicture} + \draw[fill] (0,0)-- (90:0.8mm) arc (90:270:0.8mm) -- cycle ; + \draw (0,0) circle (0.8mm); + \end{tikzpicture} +} + +%%%%%%%%%%%%%%%%%%%%%%%% +% Entities & Relations % +%%%%%%%%%%%%%%%%%%%%%%%% +\NewDocumentCommand\uhead{m}{\relax\underLine{#1}\textsubscript{\(e_1\)}} +\NewDocumentCommand\utail{m}{\relax\underLine{#1}\textsubscript{\(e_2\)}} +\NewDocumentCommand\uent{m}{\relax\underLine{#1}\textsubscript{\(e\)}} +\NewDocumentCommand\wdent{O{} m}{\texttt{\href{https://www.wikidata.org/wiki/Q#2}{#1Q#2}}} +\NewDocumentCommand\wdrel{O{} m}{\texttt{\href{https://www.wikidata.org/wiki/Property:P#2}{#1P#2}}} + +\NewDocumentCommand\tripletHolds{m m m}{\ensuremath{\mathalpha{#1}\mathrel{#2}\mathalpha{#3}}} +\NewDocumentCommand\sfTripletHolds{m m m}{\tripletHolds{#1}{\textsl{#2\/}}{#3}} + +%%%%%%%%%%%%%%% +% Common Sets % +%%%%%%%%%%%%%%% +\NewDocumentCommand\entitySet{}{{\symcal{E}}} +\NewDocumentCommand\relationSet{}{{\symcal{R}}} +\NewDocumentCommand\sentenceSet{}{{\symcal{S}}} +\NewDocumentCommand\dataSet{}{{\symcal{D}}} +\NewDocumentCommand\arcSet{}{{\symcal{A}}} +\NewDocumentCommand\kbSet{}{{\dataSet_\textup{\textsc{kb}}}} +\NewDocumentCommand\itemSet{}{{\symcal{I}}} + +%%%%%%%%%%%%%%%%%%%%%%% +% Directed F1 Metrics % +%%%%%%%%%%%%%%%%%%%%%%% +\def\widebreve{\mathpalette\wide@breve} +\def\wide@breve#1#2{\sbox\z@{$#1#2$}% + \mathop{\vbox{% + \m@th\ialign{% + ##\crcr + \kern0.08em\brevefill#1{0.9\wd\z@}\crcr% + \noalign{\nointerlineskip\vskip -0.6mm\relax}% + $\hss#1#2\hss$\crcr% + }}}\limits} +\def\Pwidebreve{\mathpalette\Pwide@breve} +\def\Pwide@breve#1#2{\sbox\z@{$#1#2$}% + \mathop{\vbox{% + \m@th\ialign{% + ##\crcr + \kern0.08em\brevefill#1{0.9\wd\z@}\crcr% + \noalign{\nointerlineskip}% + $\hss#1#2\hss$\crcr% + }}}\limits} +\def\brevefill#1#2{$\m@th\sbox\tw@{$#1($}% + \hss\resizebox{#2}{\wd\tw@}{\rotatebox[origin=c]{90}{\upshape(}}\hss$} + +%%%%%%%%% +% Graph % +%%%%%%%%% +\NewDocumentCommand\gfsource{}{\varepsilon_1} +\NewDocumentCommand\gftarget{}{\varepsilon_2} +\NewDocumentCommand\gfendpoints{}{\symbf{\varepsilon}} +\DeclareMathOperator\gfincidents{\symcal{I}} +\NewDocumentCommand\gfrelation{}{\mathop{\rho}} +\NewDocumentCommand\gfsentence{}{\mathop{\varsigma}} +\DeclareMathOperator\gfdegree{deg} +\DeclareMathOperator\gffourier{\symscr{F}} +\DeclareMathOperator\gfinvfourier{\symscr{F}^{-1}} +\NewDocumentCommand\laplacian{m}{\ensuremath{\mtrx{L}_{\textsc{#1}}}} +\NewDocumentCommand\inlineArc{s o m m}{% + \begin{tikzpicture}[baseline=(a.base)]% + \node[inner sep=0.3mm, outer sep=0mm] (a) {\(#3\)}; + \node[right=4mm of a,inner sep=0.2mm, outer sep=0mm] (b) {\(#4\)}; + \draw[-{Latex[length=1.5mm,width=1.2mm]}]% + (\IfBooleanTF{#1}{b}{a})% + --% + (\IfBooleanTF{#1}{a}{b})% + \IfValueT{#2}{node[midway,above] {\scriptsize \(#2\)}}% + ; + \end{tikzpicture}% +} +\NewDocumentCommand\inlineDoubleArc{s o o m m m}{% + \begin{tikzpicture}[baseline=(a.base)]% + \node[inner sep=0.3mm, outer sep=0mm] (a) {\(#4\)}; + \node[right=4mm of a,inner sep=0.2mm, outer sep=0mm] (b) {\(#5\)}; + \node[right=4mm of b,inner sep=0.2mm, outer sep=0mm] (c) {\(#6\)}; + \draw[-{Latex[length=1.5mm,width=1.2mm]}] (a) -- (b)% + \IfValueT{#2}{node[midway,above] {\scriptsize \(#2\)}}% + ; + \draw[-{Latex[length=1.5mm,width=1.2mm]}]% + (\IfBooleanTF{#1}{c}{b})% + --% + (\IfBooleanTF{#1}{b}{c})% + \IfValueT{#3}{node[midway,above] {\scriptsize \(#3\)}}% + ; + \end{tikzpicture}% +} + +%%%%%%%%%%%%%%%% +% Neighborhood % +%%%%%%%%%%%%%%%% +\NewDocumentCommand\gfneighbors{}{N} +\NewDocumentCommand\gfeneighbors{}{\symcal{N}} +\NewDocumentCommand\gfsr{}{{\langle r\rangle}} +\NewDocumentCommand\gforight{}{{\tikz{\draw[-{Latex[length=1mm,width=1mm]}] (0, 0) -- (2.6mm, 0);}}} +\NewDocumentCommand\gfoleft{}{{\tikz{\draw[-{Latex[length=1mm,width=1mm]}] (0, 0) -- (-2.6mm, 0);}}} +\NewDocumentCommand\gfnright{}{{\kern-0.8mm\gforight}} +\NewDocumentCommand\gfnleft{}{{\kern-0.8mm\gfoleft}} + +\NewDocumentCommand\gfneighborsrr{}{% + \operatorname{\mathnormal{\begin{tikzpicture}[baseline=(b.base)]% + \node[inner sep=0pt,outer sep=0pt] (n) {\(\gfneighbors_\gfsr\)};% + \draw[-{Latex[length=1mm,width=1mm]}] ($(n.center)+(0.39mm, 1mm)$) -- +(2.6mm, 0);% + \node[inner sep=0pt,outer sep=0pt,anchor=north west] (b) at (n.north west) {\(\gfneighbors\)};% + \clip (n.north west) rectangle (n.south east);% + \end{tikzpicture}}}% +} +\NewDocumentCommand\gfneighborsrl{}{% + \operatorname{\mathnormal{\begin{tikzpicture}[baseline=(b.base)]% + \node[inner sep=0pt,outer sep=0pt] (n) {\(\gfneighbors_\gfsr\)};% + \draw[-{Latex[length=1mm,width=1mm]}] ($(n.center)+(2.99mm, 1mm)$) -- +(-2.6mm, 0);% + \node[inner sep=0pt,outer sep=0pt,anchor=north west] (b) at (n.north west) {\(\gfneighbors\)};% + \clip (n.north west) rectangle (n.south east);% + \end{tikzpicture}}}% +} + +%%%%%%%%%%%%%%%%%%%% +% Relation Algebra % +%%%%%%%%%%%%%%%%%%%% +\NewDocumentCommand\relationComposition{}{\mathbin{\bullet}} +\NewDocumentCommand\relationIdentity{}{\symbf{I}} +\NewDocumentCommand\relationAnd{}{\cap} +\NewDocumentCommand\relationOr{}{\cup} +\NewDocumentCommand\relationZero{}{\symbf{0}} +\NewDocumentCommand\relationOne{}{\symbf{1}} + +%%%%%%%%%%%%%%%%%%%%%%% +% Loss Directionality % +%%%%%%%%%%%%%%%%%%%%%%% +\def\directedfill{\arrowfill@\relbar\relbar\rightarrow} +\def\halfdirectedfill{\arrowfill@\leftharpoonup\relbar\rightharpoondown} +\def\undirectedfill{\arrowfill@\leftarrow\relbar\rightarrow} +\NewDocumentCommand\overDirected{}{\mathpalette{\overarrow@\directedfill}} +\NewDocumentCommand\overHalfdirected{}{\mathpalette{\overarrow@\halfdirectedfill}} +\NewDocumentCommand\overUndirected{}{\mathpalette{\overarrow@\undirectedfill}} + +%%%%%%%%%%%%%%%%% +% Abbreviations % +%%%%%%%%%%%%%%%%% +\NewDocumentCommand\trexspo{}{\textsc{t-re}x \textsc{spo}} +\NewDocumentCommand\trexds{}{\textsc{t-re}x \textsc{ds}} +\NewDocumentCommand\nytfb{}{\(\textsc{nyt}+\textsc{fb}\)} + +%%%%%%%%%%%%%%%%% +% Miscellaneous % +%%%%%%%%%%%%%%%%% +\NewDocumentCommand\empP{}{\ensuremath{\hat{P}}} +\NewDocumentCommand\loss{O{\textsc} m}{\ensuremath{\symcal{L}_{#1{#2}}}} +\NewDocumentCommand\problem{m}{\ensuremath{\symscr{P}\;#1}} +\NewDocumentCommand\bertArch{m}{\textsc{bert-}\discretionary{}{}{}\texttt{#1}} +\NewDocumentCommand\bertcoder{}{\ensuremath{\operatorname{\textsc{bert}coder}}} +\NewDocumentCommand\fone{}{\ensuremath{F_1}} +\NewDocumentCommand\bcubed{}{\ensuremath{\symup{B}^3}} +\NewDocumentCommand\blanktag{}{\texttt{<}\textsc{blank}\texttt{/>}} +\NewDocumentCommand\ctxoneadj{}{\(\operatorname{\textsc{ctx}}(\textsc{1-adjacency})\)} +\NewDocumentCommand\mmsrlencoder{o}{\ensuremath{\operatorname{enc\IfValueT{#1}{_\text{#1}}}}} +\NewDocumentCommand\mmsrldecoder{o}{\ensuremath{\operatorname{dec\IfValueT{#1}{_\text{#1}}}}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Hypothesis Management %%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\NewDocumentCommand\hypothesis{m}{\ensuremath{\symscr{H}_\textsc{#1}}} + +\NewDocumentCommand\typeassumption{o m}{% + \begin{trivlist}\item\relax% + \ifcsname hyp:#2\endcsname% + \expandafter\csname hyp:#2\endcsname% + \IfValueT{#1}{% + \rlap{\kern\marginparsep\marginsize\textsc{#1}}% + }% + \else% + \thesis@warning{Trying to type undefined assumption #2.}% + \fi% + \end{trivlist}% +} + +%Global lrbox +\let\glrbox\lrbox +\let\endglrbox\endlrbox +\patchcmd{\glrbox} + {\setbox} + {\global\setbox} + {} + {\thesis@patch@error{Package}{Patch failed, can't make a global lrbox.}} + +\NewDocumentCommand\thesis@define@assumption{m m +m}{ + \expandafter\gdef\csname hyp:#1\endcsname{% + \begin{minipage}[t]{\linewidth}% + \textbf{Assumption} \hypothesis{#2}\textbf{: }% + \em\ignorespaces% + #3% + \end{minipage}% + }% +} + +\NewDocumentEnvironment{assumption}{O{#2} m +b} + {% + \protected@write\@auxout{}{% + \string\thesis@define@assumption{#1}{#2}{\unexpanded{#3}}% + \string\expandafter\xdef\string\csname\space sec:assumption:#1\string\endcsname{\thesubsection}% + }% + \thesis@define@assumption{#1}{#2}{#3}% + \phantomsection% + \label{assumption:#1}% + \expandafter\xdef\csname sec:assumption:#1\endcsname{\thesubsection}% + \typeassumption{#1}% + } + +\NewDocumentEnvironment{refAssumptionSection}{m}{% + \hyperref[assumption:#1]{% + \ifcsname sec:assumption:#1\endcsname% + \csname sec:assumption:#1\endcsname% + \else% + \textbf{??}% + \thesis@warning{Reference to undefined assumption #1.} + \fi% + }% +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Lua libraries interface %%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\directlua{render = require("lib/render")} +\NewDocumentCommand\renderEmbeddings{m}{\directlua{render.embeddings([[#1]])}} +\NewDocumentCommand\renderConfusions{m m m m m m m m}{\directlua{render.confusions("\luatexluaescapestring{\unexpanded{#1}}", "\luatexluaescapestring{\unexpanded{#2}}", "\luatexluaescapestring{\unexpanded{#3}}", "\luatexluaescapestring{\unexpanded{#4}}", "\luatexluaescapestring{\unexpanded{#5}}", "\luatexluaescapestring{\unexpanded{#6}}", "\luatexluaescapestring{\unexpanded{#7}}", "\luatexluaescapestring{\unexpanded{#8}}")}} +\NewDocumentCommand\renderDegrees{m}{\directlua{render.degrees([[#1]])}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% TikZ setup %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\input{lib/memory network.def} +\input{lib/plate diagram.def} +\input{lib/distribution output.def} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Font setup %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newfontfamily\greekFont{GFS Didot Classic}[Ligatures=TeX] +\languageattribute{greek}{ancient} +\addto\extrasgreek{\greekFont} + +\newfontfamily\traditionalChineseFont{I.Ming}[Ligatures=TeX,LetterSpace=15,Vertical=Alternates] +\NewDocumentCommand\traditionalChinese{m}{\traditionalChineseFont\fontsize{9pt}{11pt}\selectfont #1} diff --git a/thesis.tex b/thesis.tex @@ -0,0 +1,51 @@ +\documentclass[digital]{thesis} + +\title{Deep Learning for Unsupervised Relation Extraction} +\author{Étienne Simon} +\keywords{Machine Learning\sep Deep Learning\sep Natural Language Processing\sep Information Extraction\sep Relation Extraction} + +\usepackage{thesis} +\addbibresource{thesis.bib} + +\begin{document} + %%%%%%%%%%%%%%%% + % Front Matter % + %%%%%%%%%%%%%%%% + \frontmatter + \withoutmarginpar + \include{frontmatter/title} + \include{frontmatter/abstract} + %\include{frontmatter/acknowledgements} + \tableofcontents + \listoffigures + \listoftables + \listofalgorithms + \include{frontmatter/abbreviations} + \include{frontmatter/notation} + \withmarginpar + \include{frontmatter/introduction} + + %%%%%%%%%%%%%%% + % Main Matter % + %%%%%%%%%%%%%%% + \mainmatter + \include{mainmatter/context/chapter} + \include{mainmatter/relation extraction/chapter} + \include{mainmatter/fitb/chapter} + \include{mainmatter/graph/chapter} + + %%%%%%%%%%%%%%% + % Back Matter % + %%%%%%%%%%%%%%% + \backmatter + \include{backmatter/conclusion} + \mainmatter + \appendix + \include{backmatter/french/appendix} + \include{backmatter/assumptions/appendix} + \include{backmatter/datasets/appendix} + \backmatter + \withoutmarginpar + \printbibliography[heading=bibintoc] + \include{backmatter/colophon} +\end{document}