|
| 1 | +<?xml version="1.0" encoding="UTF-8"?> |
| 2 | +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" |
| 3 | + xml:lang="en" |
| 4 | + xml:id="ParlaMint-AT-en.ana" |
| 5 | + corresp="../ParlaMint-AT.TEI.ana/ParlaMint-AT.ana.xml"> |
| 6 | + <teiHeader> |
| 7 | + <fileDesc> |
| 8 | + <titleStmt> |
| 9 | + <title xml:lang="de" type="main">Österreichisches Parlamentskorpus ParlaMint-AT-en [ParlaMint-en.ana SAMPLE]</title> |
| 10 | + <title xml:lang="en" type="main">Austrian parliamentary corpus ParlaMint-AT-en [ParlaMint-en.ana SAMPLE]</title> |
| 11 | + <title xml:lang="de" type="sub">Stenographische Protokolle der Plenarsitzungen des Österreichischen Nationalrats, XX. Gesetzgebungsberiode - XXVII. Gesetzgebungsperiode (1996 - 2022)</title> |
| 12 | + <title xml:lang="en" type="sub">Shorthand records of the plenary sittings of the National Council of the Austrian parliament, terms 20 - terms 27 (1996 - 2022)</title> |
| 13 | + <meeting n="27" corresp="#NR" ana="#parla.lower #parla.term #NR.XXVII"/> |
| 14 | + <meeting n="26" corresp="#NR" ana="#parla.lower #parla.term #NR.XXVI"/> |
| 15 | + <meeting n="25" corresp="#NR" ana="#parla.lower #parla.term #NR.XXV"/> |
| 16 | + <meeting n="24" corresp="#NR" ana="#parla.lower #parla.term #NR.XXIV"/> |
| 17 | + <meeting n="23" corresp="#NR" ana="#parla.lower #parla.term #NR.XXIII"/> |
| 18 | + <meeting n="22" corresp="#NR" ana="#parla.lower #parla.term #NR.XXII"/> |
| 19 | + <meeting n="21" corresp="#NR" ana="#parla.lower #parla.term #NR.XXI"/> |
| 20 | + <meeting n="20" corresp="#NR" ana="#parla.lower #parla.term #NR.XX"/> |
| 21 | + <respStmt> |
| 22 | + <persName ref="https://orcid.org/0000-0002-8111-5584">Hannes Pirker</persName> |
| 23 | + <persName ref="https://orcid.org/0000-0003-2436-0361">Daniel Schopper</persName> |
| 24 | + <persName ref="https://orcid.org/0000-0002-1631-4560">Tanja Wissik</persName> |
| 25 | + <resp xml:lang="de">Projektplanung und Methode</resp> |
| 26 | + <resp xml:lang="en">Project set-up and methodology</resp> |
| 27 | + </respStmt> |
| 28 | + <respStmt> |
| 29 | + <persName>Hannes Pirker</persName> |
| 30 | + <resp xml:lang="de">Datenbeschaffung, Korpuskodierung in TEI und automatische linguistische Annotation</resp> |
| 31 | + <resp xml:lang="en">Data retrieval, TEI corpus encoding and automatic linguistic annotation</resp> |
| 32 | + </respStmt> |
| 33 | + <respStmt> |
| 34 | + <persName>Daniel Schopper</persName> |
| 35 | + <resp xml:lang="de">XSLT Transformationen</resp> |
| 36 | + <resp xml:lang="en">XSLT transformations</resp> |
| 37 | + </respStmt> |
| 38 | + <respStmt> |
| 39 | + <persName>Martin Kirnbauer</persName> |
| 40 | + <resp xml:lang="de">Einige der manuellen Korrekturen</resp> |
| 41 | + <resp xml:lang="en">Some of the manual curation</resp> |
| 42 | + </respStmt> |
| 43 | + <respStmt> |
| 44 | + <persName>Tanja Wissik</persName> |
| 45 | + <resp xml:lang="de">Metadaten und Übersetzung</resp> |
| 46 | + <resp xml:lang="en">Metadata and translation</resp> |
| 47 | + </respStmt> |
| 48 | + <respStmt> |
| 49 | + <persName>Taja Kuzman</persName> |
| 50 | + <persName>Nikola Ljubešić</persName> |
| 51 | + <resp xml:lang="en">Machine translation to English and linguistic analysis of the translation</resp> |
| 52 | + </respStmt> |
| 53 | + <funder> |
| 54 | + <orgName xml:lang="de">CLARIN-ERIC</orgName> |
| 55 | + <orgName xml:lang="en">CLARIN-ERIC (Common Language Resources and Technology Infrastructure—European Research Infrastructure Consortium)</orgName> |
| 56 | + <ref target="https://www.clarin.eu/">www.clarin.eu</ref> |
| 57 | + </funder> |
| 58 | + <funder> |
| 59 | + <orgName xml:lang="de">ÖAW (Österreichische Akademie der Wissenschaften)</orgName> |
| 60 | + <orgName xml:lang="en">ÖAW (Austrian Academy of Sciences)</orgName> |
| 61 | + <ref target="https://www.oeaw.ac.at/">www.oeaw.ac.at</ref> |
| 62 | + </funder> |
| 63 | + </titleStmt> |
| 64 | + <editionStmt> |
| 65 | + <edition>3.0</edition> |
| 66 | + </editionStmt> |
| 67 | + <extent><!--These numbers do not reflect the size of the sample!--> |
| 68 | + <measure unit="speeches" quantity="227991" xml:lang="en">227,991 speeches</measure> |
| 69 | + <measure unit="words" quantity="63932213" xml:lang="en">63,932,213 words</measure> |
| 70 | + </extent> |
| 71 | + <publicationStmt> |
| 72 | + <publisher> |
| 73 | + <orgName xml:lang="de">Die CLARIN Forschungsinfrastruktur</orgName> |
| 74 | + <orgName xml:lang="en">The CLARIN research infrastructure</orgName> |
| 75 | + <ref target="https://www.clarin.eu/">www.clarin.eu</ref> |
| 76 | + </publisher> |
| 77 | + <idno type="URI" subtype="handle">http://hdl.handle.net/11356/1810</idno> |
| 78 | + <availability status="free"> |
| 79 | + <licence>http://creativecommons.org/licenses/by/4.0/</licence> |
| 80 | + <p xml:lang="de">Dieses Werk ist lizensiert unter der <ref target="http://creativecommons.org/licenses/by/4.0/">Creative Commons Namensnennung 4.0 International Lizenz (CC BY 4.0)</ref>.</p> |
| 81 | + <p xml:lang="en">This work is licensed under the <ref target="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ref>.</p> |
| 82 | + </availability> |
| 83 | + <date when="2023-06-24">2023-06-24</date> |
| 84 | + </publicationStmt> |
| 85 | + <sourceDesc> |
| 86 | + <bibl> |
| 87 | + <title type="main" xml:lang="de">Stenographische Protokolle der Plenarsitzungen des Nationalrats der Republik Österreich</title> |
| 88 | + <title type="main" xml:lang="en">Shorthand records of the plenary sittings of the National Council of the Austrian parliament</title> |
| 89 | + <publisher>Parlamentsdirektion</publisher> |
| 90 | + <idno type="URI" subtype="parliament">https://www.parlament.gv.at/PAKT/STPROT</idno> |
| 91 | + <date from="1996-01-15" to="2022-05-19">15.01.1996 - 19.05.2022</date> |
| 92 | + </bibl> |
| 93 | + </sourceDesc> |
| 94 | + </fileDesc> |
| 95 | + <encodingDesc> |
| 96 | + <projectDesc> |
| 97 | + <p xml:lang="en"> |
| 98 | + <ref target="https://www.clarin.eu/content/parlamint">ParlaMint</ref> is a project that aims to (1) create a multilingual set of comparable corpora of parliamentary proceedings uniformly encoded according to the <ref target="https://clarin-eric.github.io/ParlaMint/">ParlaMint encoding guidelines</ref>, covering the period from 2015 to mid-2022; (2) add linguistic annotations to the corpora and machine-translate them to English; (3) make the corpora available through concordancers; and (4) build use cases in Political Sciences and Digital Humanities based on the corpus data.</p> |
| 99 | + <p xml:lang="de"> |
| 100 | + <ref target="https://www.clarin.eu/content/parlamint">ParlaMint</ref> |
| 101 | + </p> |
| 102 | + </projectDesc> |
| 103 | + <editorialDecl> |
| 104 | + <correction> |
| 105 | + <p>No correction of source texts was performed.</p> |
| 106 | + </correction> |
| 107 | + <normalization> |
| 108 | + <p>Text has not been normalised, except for spacing. Printed matter quoted in the protocols was removed</p> |
| 109 | + </normalization> |
| 110 | + <hyphenation> |
| 111 | + <p>No end-of-line hyphens were present in the source.</p> |
| 112 | + </hyphenation> |
| 113 | + <quotation> |
| 114 | + <p>Quotation marks have been left in the text and are not explicitly marked up.</p> |
| 115 | + </quotation> |
| 116 | + <segmentation> |
| 117 | + <p>The texts are segmented into utterances (speeches) and segments (corresponding to paragraphs in the source transcription).</p> |
| 118 | + </segmentation> |
| 119 | + </editorialDecl> |
| 120 | + <tagsDecl><!--These numbers do not reflect the size of the sample!--> |
| 121 | + <namespace name="http://www.tei-c.org/ns/1.0"> |
| 122 | + <tagUsage gi="body" occurs="1197"/> |
| 123 | + <tagUsage gi="desc" occurs="346176"/> |
| 124 | + <tagUsage gi="div" occurs="1197"/> |
| 125 | + <tagUsage gi="gap" occurs="14864"/> |
| 126 | + <tagUsage gi="kinesic" occurs="248593"/> |
| 127 | + <tagUsage gi="name" occurs="2100135"/> |
| 128 | + <tagUsage gi="note" occurs="668625"/> |
| 129 | + <tagUsage gi="pb" occurs="116531"/> |
| 130 | + <tagUsage gi="pc" occurs="9280443"/> |
| 131 | + <tagUsage gi="s" occurs="3919672"/> |
| 132 | + <tagUsage gi="seg" occurs="662401"/> |
| 133 | + <tagUsage gi="text" occurs="1197"/> |
| 134 | + <tagUsage gi="u" occurs="227991"/> |
| 135 | + <tagUsage gi="vocal" occurs="82719"/> |
| 136 | + <tagUsage gi="w" occurs="63932213"/> |
| 137 | + </namespace> |
| 138 | + </tagsDecl> |
| 139 | + <classDecl> |
| 140 | + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" |
| 141 | + href="ParlaMint-taxonomy-parla.legislature.xml"/> |
| 142 | + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" |
| 143 | + href="ParlaMint-taxonomy-speaker_types.xml"/> |
| 144 | + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" |
| 145 | + href="ParlaMint-taxonomy-subcorpus.xml"/> |
| 146 | + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" |
| 147 | + href="ParlaMint-taxonomy-NER.ana.xml"/> |
| 148 | + </classDecl> |
| 149 | + <appInfo> |
| 150 | + <application ident="EasyNMT" version="2.0"> |
| 151 | + <label>EasyNMT (OPUS-MT model)</label> |
| 152 | + <desc>Translation to English done with EasyNMT (<ref target="https://github.com/UKPLab/EasyNMT">https://github.com/UKPLab/EasyNMT</ref>) with OPUS-MT model gmw (<ref target="https://github.com/Helsinki-NLP/Opus-MT">https://github.com/Helsinki-NLP/Opus-MT</ref>)</desc> |
| 153 | + </application> |
| 154 | + <application ident="Stanza" version="1.5"> |
| 155 | + <label>Stanza</label> |
| 156 | + <desc>Tokenisation, PoS tagging, lemmatization, and NER annotation done with Stanza (<ref target="https://stanfordnlp.github.io/stanza/">https://stanfordnlp.github.io/stanza/</ref>) with the model for English. For NER the conll03 model with 4 NE classes was used.</desc> |
| 157 | + </application> |
| 158 | + </appInfo> |
| 159 | + </encodingDesc> |
| 160 | + <profileDesc> |
| 161 | + <settingDesc> |
| 162 | + <setting> |
| 163 | + <name type="city" xml:lang="de">Wien</name> |
| 164 | + <name type="city" xml:lang="en">Vienna</name> |
| 165 | + <name type="country" xml:lang="de" key="AT">Österreich</name> |
| 166 | + <name type="country" xml:lang="en" key="AT">Austria</name> |
| 167 | + <date from="1996-01-15" to="2022-04-27"/> |
| 168 | + </setting> |
| 169 | + </settingDesc> |
| 170 | + <textClass> |
| 171 | + <catRef scheme="#ParlaMint-taxonomy-parla.legislature" |
| 172 | + target="#parla.bi #parla.lower"/> |
| 173 | + </textClass> |
| 174 | + <particDesc> |
| 175 | + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-AT-listOrg.xml"/> |
| 176 | + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" |
| 177 | + href="ParlaMint-AT-listPerson.xml"/> |
| 178 | + </particDesc> |
| 179 | + <langUsage> |
| 180 | + <language ident="de" xml:lang="de">Deutsch</language> |
| 181 | + <language ident="de" xml:lang="en">German</language> |
| 182 | + <language ident="en" xml:lang="de">Englisch</language> |
| 183 | + <language ident="en" xml:lang="en">English</language> |
| 184 | + </langUsage> |
| 185 | + </profileDesc> |
| 186 | + <revisionDesc> |
| 187 | + <change when="2023-06-24"> |
| 188 | + <name>Tomaž Erjavec</name>: Made sample.</change> |
| 189 | + <change when="2023-06-24">parlamint2release script: Fix some identifiable erros for the release.</change> |
| 190 | + <change when="2023-06-23"> |
| 191 | + <name>Tomaž Erjavec</name>: Generate TEI version of MTed corpus.</change> |
| 192 | + <change when="2023-06-24">parlamint-add-common-content script: Adding common content.</change> |
| 193 | + </revisionDesc> |
| 194 | + </teiHeader> |
| 195 | + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" |
| 196 | + href="ParlaMint-AT-en_2005-03-31-022-XXII-NRSITZ-00100.ana.xml"/> |
| 197 | + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" |
| 198 | + href="ParlaMint-AT-en_2014-09-24-025-XXV-NRSITZ-00042.ana.xml"/> |
| 199 | + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" |
| 200 | + href="ParlaMint-AT-en_2022-05-19-027-XXVII-NRSITZ-00159.ana.xml"/> |
| 201 | +</teiCorpus> |
0 commit comments