| @@ -26,6 +26,7 @@ services: | |||
| - '8983:8983' | |||
| volumes: | |||
| - solrdata:/var/solr | |||
| - ./solr_config:/opt/solr/server/solr/configsets/custom | |||
| volumes: | |||
| solrdata: | |||
| @@ -44,6 +44,14 @@ include '../ops_api.php' | |||
| <br><br> | |||
| Year: | |||
| <?php | |||
| echo $result['year']; | |||
| ?> | |||
| <br><br> | |||
| EPO publication: | |||
| <a href=<?php echo $result['epo_publication_url']; ?>> | |||
| @@ -44,6 +44,11 @@ $cores = array("active", "expanding", "invisible", "multispecies", "surviving"); | |||
| <option value="multispecies">multi-species</option> | |||
| <option value="surviving">surviving</option> | |||
| </select> | |||
| sort by: | |||
| <select name="sort" id="sort"> | |||
| <option value="relevance">relevance</option> | |||
| <option value="year">year</option> | |||
| </select> | |||
| <input type="submit" id="submit" value="search"> | |||
| </form> | |||
| </div> | |||
| @@ -26,7 +26,7 @@ include '../solr.php'; | |||
| <?php | |||
| $search_results = solr_search($_POST["search"], $_POST["searchopt"]); | |||
| $search_results = solr_search($_POST["search"], $_POST["searchopt"], $_POST["sort"]); | |||
| if(is_array($search_results)): | |||
| @@ -43,6 +43,14 @@ include '../solr.php'; | |||
| <br><br> | |||
| Year: | |||
| <?php | |||
| echo $result['year']; | |||
| ?> | |||
| <br><br> | |||
| EPO publication: | |||
| <a href=<?php echo $result['epo_publication_url']; ?>> | |||
| @@ -72,7 +80,7 @@ include '../solr.php'; | |||
| <br><br> | |||
| <?php | |||
| if ($result['abstract']): | |||
| if (isset($result['abstract'])): | |||
| ?> | |||
| Abstract: | |||
| @@ -1,9 +1,15 @@ | |||
| <?php | |||
| function solr_search($search, $core){ | |||
| function solr_search($search, $core, $sort){ | |||
| // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||
| $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json'; | |||
| if ($sort == 'relevance'){ | |||
| // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||
| $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json'; | |||
| } | |||
| else{ | |||
| // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||
| $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json&sort=' . $sort . '%20asc'; | |||
| } | |||
| // Perform Curl request on the Solr API | |||
| $ch = curl_init(); | |||
| @@ -100,6 +106,11 @@ function parse_result($id, $input){ | |||
| elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) { | |||
| $output['abstract'] = $abstract[1]; | |||
| } | |||
| // Search for the year in the content element and display it | |||
| if (preg_match('/=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})/', $input, $year)){ | |||
| $output['year'] = $year[1]; | |||
| } | |||
| return $output; | |||
| } | |||
| @@ -0,0 +1,67 @@ | |||
| <?xml version="1.0" ?> | |||
| <!-- | |||
| Licensed to the Apache Software Foundation (ASF) under one or more | |||
| contributor license agreements. See the NOTICE file distributed with | |||
| this work for additional information regarding copyright ownership. | |||
| The ASF licenses this file to You under the Apache License, Version 2.0 | |||
| (the "License"); you may not use this file except in compliance with | |||
| the License. You may obtain a copy of the License at | |||
| http://www.apache.org/licenses/LICENSE-2.0 | |||
| Unless required by applicable law or agreed to in writing, software | |||
| distributed under the License is distributed on an "AS IS" BASIS, | |||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| See the License for the specific language governing permissions and | |||
| limitations under the License. | |||
| --> | |||
| <!-- Example exchange rates file for CurrencyField type named "currency" in example schema --> | |||
| <currencyConfig version="1.0"> | |||
| <rates> | |||
| <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 --> | |||
| <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" /> | |||
| <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" /> | |||
| <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" /> | |||
| <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" /> | |||
| <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" /> | |||
| <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" /> | |||
| <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" /> | |||
| <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" /> | |||
| <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" /> | |||
| <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" /> | |||
| <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" /> | |||
| <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" /> | |||
| <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" /> | |||
| <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" /> | |||
| <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" /> | |||
| <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" /> | |||
| <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" /> | |||
| <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" /> | |||
| <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" /> | |||
| <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" /> | |||
| <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" /> | |||
| <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" /> | |||
| <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" /> | |||
| <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" /> | |||
| <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" /> | |||
| <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" /> | |||
| <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" /> | |||
| <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" /> | |||
| <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" /> | |||
| <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" /> | |||
| <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" /> | |||
| <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" /> | |||
| <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" /> | |||
| <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" /> | |||
| <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" /> | |||
| <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" /> | |||
| <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" /> | |||
| <!-- Cross-rates for some common currencies --> | |||
| <rate from="EUR" to="GBP" rate="0.869914" /> | |||
| <rate from="EUR" to="NOK" rate="7.800095" /> | |||
| <rate from="GBP" to="NOK" rate="8.966508" /> | |||
| </rates> | |||
| </currencyConfig> | |||
| @@ -0,0 +1,42 @@ | |||
| <?xml version="1.0" encoding="UTF-8" ?> | |||
| <!-- | |||
| Licensed to the Apache Software Foundation (ASF) under one or more | |||
| contributor license agreements. See the NOTICE file distributed with | |||
| this work for additional information regarding copyright ownership. | |||
| The ASF licenses this file to You under the Apache License, Version 2.0 | |||
| (the "License"); you may not use this file except in compliance with | |||
| the License. You may obtain a copy of the License at | |||
| http://www.apache.org/licenses/LICENSE-2.0 | |||
| Unless required by applicable law or agreed to in writing, software | |||
| distributed under the License is distributed on an "AS IS" BASIS, | |||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| See the License for the specific language governing permissions and | |||
| limitations under the License. | |||
| --> | |||
| <!-- If this file is found in the config directory, it will only be | |||
| loaded once at startup. If it is found in Solr's data | |||
| directory, it will be re-loaded every commit. | |||
| See http://wiki.apache.org/solr/QueryElevationComponent for more info | |||
| --> | |||
| <elevate> | |||
| <!-- Query elevation examples | |||
| <query text="foo bar"> | |||
| <doc id="1" /> | |||
| <doc id="2" /> | |||
| <doc id="3" /> | |||
| </query> | |||
| for use with techproducts example | |||
| <query text="ipod"> | |||
| <doc id="MA147LL/A" /> put the actual ipod at the top | |||
| <doc id="IW-02" exclude="true" /> exclude this cable | |||
| </query> | |||
| --> | |||
| </elevate> | |||
| @@ -0,0 +1,2 @@ | |||
| <URL> | |||
| <EMAIL> | |||
| @@ -0,0 +1,8 @@ | |||
| # Set of Catalan contractions for ElisionFilter | |||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||
| d | |||
| l | |||
| m | |||
| n | |||
| s | |||
| t | |||
| @@ -0,0 +1,15 @@ | |||
| # Set of French contractions for ElisionFilter | |||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||
| l | |||
| m | |||
| t | |||
| qu | |||
| n | |||
| s | |||
| j | |||
| d | |||
| c | |||
| jusqu | |||
| quoiqu | |||
| lorsqu | |||
| puisqu | |||
| @@ -0,0 +1,5 @@ | |||
| # Set of Irish contractions for ElisionFilter | |||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||
| d | |||
| m | |||
| b | |||
| @@ -0,0 +1,23 @@ | |||
| # Set of Italian contractions for ElisionFilter | |||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||
| c | |||
| l | |||
| all | |||
| dall | |||
| dell | |||
| nell | |||
| sull | |||
| coll | |||
| pell | |||
| gl | |||
| agl | |||
| dagl | |||
| degl | |||
| negl | |||
| sugl | |||
| un | |||
| m | |||
| t | |||
| s | |||
| v | |||
| d | |||
| @@ -0,0 +1,5 @@ | |||
| # Set of Irish hyphenations for StopFilter | |||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||
| h | |||
| n | |||
| t | |||
| @@ -0,0 +1,6 @@ | |||
| # Set of overrides for the dutch stemmer | |||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||
| fiets fiets | |||
| bromfiets bromfiets | |||
| ei eier | |||
| kind kinder | |||
| @@ -0,0 +1,420 @@ | |||
| # | |||
| # This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. | |||
| # | |||
| # Any token with a part-of-speech tag that exactly matches those defined in this | |||
| # file are removed from the token stream. | |||
| # | |||
| # Set your own stoptags by uncommenting the lines below. Note that comments are | |||
| # not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, | |||
| # etc. that can be useful for building you own stoptag set. | |||
| # | |||
| # The entire possible tagset is provided below for convenience. | |||
| # | |||
| ##### | |||
| # noun: unclassified nouns | |||
| #名詞 | |||
| # | |||
| # noun-common: Common nouns or nouns where the sub-classification is undefined | |||
| #名詞-一般 | |||
| # | |||
| # noun-proper: Proper nouns where the sub-classification is undefined | |||
| #名詞-固有名詞 | |||
| # | |||
| # noun-proper-misc: miscellaneous proper nouns | |||
| #名詞-固有名詞-一般 | |||
| # | |||
| # noun-proper-person: Personal names where the sub-classification is undefined | |||
| #名詞-固有名詞-人名 | |||
| # | |||
| # noun-proper-person-misc: names that cannot be divided into surname and | |||
| # given name; foreign names; names where the surname or given name is unknown. | |||
| # e.g. お市の方 | |||
| #名詞-固有名詞-人名-一般 | |||
| # | |||
| # noun-proper-person-surname: Mainly Japanese surnames. | |||
| # e.g. 山田 | |||
| #名詞-固有名詞-人名-姓 | |||
| # | |||
| # noun-proper-person-given_name: Mainly Japanese given names. | |||
| # e.g. 太郎 | |||
| #名詞-固有名詞-人名-名 | |||
| # | |||
| # noun-proper-organization: Names representing organizations. | |||
| # e.g. 通産省, NHK | |||
| #名詞-固有名詞-組織 | |||
| # | |||
| # noun-proper-place: Place names where the sub-classification is undefined | |||
| #名詞-固有名詞-地域 | |||
| # | |||
| # noun-proper-place-misc: Place names excluding countries. | |||
| # e.g. アジア, バルセロナ, 京都 | |||
| #名詞-固有名詞-地域-一般 | |||
| # | |||
| # noun-proper-place-country: Country names. | |||
| # e.g. 日本, オーストラリア | |||
| #名詞-固有名詞-地域-国 | |||
| # | |||
| # noun-pronoun: Pronouns where the sub-classification is undefined | |||
| #名詞-代名詞 | |||
| # | |||
| # noun-pronoun-misc: miscellaneous pronouns: | |||
| # e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ | |||
| #名詞-代名詞-一般 | |||
| # | |||
| # noun-pronoun-contraction: Spoken language contraction made by combining a | |||
| # pronoun and the particle 'wa'. | |||
| # e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ | |||
| #名詞-代名詞-縮約 | |||
| # | |||
| # noun-adverbial: Temporal nouns such as names of days or months that behave | |||
| # like adverbs. Nouns that represent amount or ratios and can be used adverbially, | |||
| # e.g. 金曜, 一月, 午後, 少量 | |||
| #名詞-副詞可能 | |||
| # | |||
| # noun-verbal: Nouns that take arguments with case and can appear followed by | |||
| # 'suru' and related verbs (する, できる, なさる, くださる) | |||
| # e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り | |||
| #名詞-サ変接続 | |||
| # | |||
| # noun-adjective-base: The base form of adjectives, words that appear before な ("na") | |||
| # e.g. 健康, 安易, 駄目, だめ | |||
| #名詞-形容動詞語幹 | |||
| # | |||
| # noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. | |||
| # e.g. 0, 1, 2, 何, 数, 幾 | |||
| #名詞-数 | |||
| # | |||
| # noun-affix: noun affixes where the sub-classification is undefined | |||
| #名詞-非自立 | |||
| # | |||
| # noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that | |||
| # attach to the base form of inflectional words, words that cannot be classified | |||
| # into any of the other categories below. This category includes indefinite nouns. | |||
| # e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, | |||
| # 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, | |||
| # 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, | |||
| # わり, 割り, 割, ん-口語/, もん-口語/ | |||
| #名詞-非自立-一般 | |||
| # | |||
| # noun-affix-adverbial: noun affixes that that can behave as adverbs. | |||
| # e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, | |||
| # 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, | |||
| # 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, | |||
| # とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, | |||
| # 儘, 侭, みぎり, 矢先 | |||
| #名詞-非自立-副詞可能 | |||
| # | |||
| # noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars | |||
| # with the stem よう(だ) ("you(da)"). | |||
| # e.g. よう, やう, 様 (よう) | |||
| #名詞-非自立-助動詞語幹 | |||
| # | |||
| # noun-affix-adjective-base: noun affixes that can connect to the indeclinable | |||
| # connection form な (aux "da"). | |||
| # e.g. みたい, ふう | |||
| #名詞-非自立-形容動詞語幹 | |||
| # | |||
| # noun-special: special nouns where the sub-classification is undefined. | |||
| #名詞-特殊 | |||
| # | |||
| # noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is | |||
| # treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base | |||
| # form of inflectional words. | |||
| # e.g. そう | |||
| #名詞-特殊-助動詞語幹 | |||
| # | |||
| # noun-suffix: noun suffixes where the sub-classification is undefined. | |||
| #名詞-接尾 | |||
| # | |||
| # noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect | |||
| # to ガル or タイ and can combine into compound nouns, words that cannot be classified into | |||
| # any of the other categories below. In general, this category is more inclusive than | |||
| # 接尾語 ("suffix") and is usually the last element in a compound noun. | |||
| # e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, | |||
| # よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 | |||
| #名詞-接尾-一般 | |||
| # | |||
| # noun-suffix-person: Suffixes that form nouns and attach to person names more often | |||
| # than other nouns. | |||
| # e.g. 君, 様, 著 | |||
| #名詞-接尾-人名 | |||
| # | |||
| # noun-suffix-place: Suffixes that form nouns and attach to place names more often | |||
| # than other nouns. | |||
| # e.g. 町, 市, 県 | |||
| #名詞-接尾-地域 | |||
| # | |||
| # noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that | |||
| # can appear before スル ("suru"). | |||
| # e.g. 化, 視, 分け, 入り, 落ち, 買い | |||
| #名詞-接尾-サ変接続 | |||
| # | |||
| # noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, | |||
| # is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the | |||
| # conjunctive form of inflectional words. | |||
| # e.g. そう | |||
| #名詞-接尾-助動詞語幹 | |||
| # | |||
| # noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive | |||
| # form of inflectional words and appear before the copula だ ("da"). | |||
| # e.g. 的, げ, がち | |||
| #名詞-接尾-形容動詞語幹 | |||
| # | |||
| # noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. | |||
| # e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) | |||
| #名詞-接尾-副詞可能 | |||
| # | |||
| # noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category | |||
| # is more inclusive than 助数詞 ("classifier") and includes common nouns that attach | |||
| # to numbers. | |||
| # e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 | |||
| #名詞-接尾-助数詞 | |||
| # | |||
| # noun-suffix-special: Special suffixes that mainly attach to inflecting words. | |||
| # e.g. (楽し) さ, (考え) 方 | |||
| #名詞-接尾-特殊 | |||
| # | |||
| # noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words | |||
| # together. | |||
| # e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) | |||
| #名詞-接続詞的 | |||
| # | |||
| # noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are | |||
| # semantically verb-like. | |||
| # e.g. ごらん, ご覧, 御覧, 頂戴 | |||
| #名詞-動詞非自立的 | |||
| # | |||
| # noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, | |||
| # dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") | |||
| # is いわく ("iwaku"). | |||
| #名詞-引用文字列 | |||
| # | |||
| # noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and | |||
| # behave like an adjective. | |||
| # e.g. 申し訳, 仕方, とんでも, 違い | |||
| #名詞-ナイ形容詞語幹 | |||
| # | |||
| ##### | |||
| # prefix: unclassified prefixes | |||
| #接頭詞 | |||
| # | |||
| # prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) | |||
| # excluding numerical expressions. | |||
| # e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) | |||
| #接頭詞-名詞接続 | |||
| # | |||
| # prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb | |||
| # in conjunctive form followed by なる/なさる/くださる. | |||
| # e.g. お (読みなさい), お (座り) | |||
| #接頭詞-動詞接続 | |||
| # | |||
| # prefix-adjectival: Prefixes that attach to adjectives. | |||
| # e.g. お (寒いですねえ), バカ (でかい) | |||
| #接頭詞-形容詞接続 | |||
| # | |||
| # prefix-numerical: Prefixes that attach to numerical expressions. | |||
| # e.g. 約, およそ, 毎時 | |||
| #接頭詞-数接続 | |||
| # | |||
| ##### | |||
| # verb: unclassified verbs | |||
| #動詞 | |||
| # | |||
| # verb-main: | |||
| #動詞-自立 | |||
| # | |||
| # verb-auxiliary: | |||
| #動詞-非自立 | |||
| # | |||
| # verb-suffix: | |||
| #動詞-接尾 | |||
| # | |||
| ##### | |||
| # adjective: unclassified adjectives | |||
| #形容詞 | |||
| # | |||
| # adjective-main: | |||
| #形容詞-自立 | |||
| # | |||
| # adjective-auxiliary: | |||
| #形容詞-非自立 | |||
| # | |||
| # adjective-suffix: | |||
| #形容詞-接尾 | |||
| # | |||
| ##### | |||
| # adverb: unclassified adverbs | |||
| #副詞 | |||
| # | |||
| # adverb-misc: Words that can be segmented into one unit and where adnominal | |||
| # modification is not possible. | |||
| # e.g. あいかわらず, 多分 | |||
| #副詞-一般 | |||
| # | |||
| # adverb-particle_conjunction: Adverbs that can be followed by の, は, に, | |||
| # な, する, だ, etc. | |||
| # e.g. こんなに, そんなに, あんなに, なにか, なんでも | |||
| #副詞-助詞類接続 | |||
| # | |||
| ##### | |||
| # adnominal: Words that only have noun-modifying forms. | |||
| # e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, | |||
| # どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, | |||
| # 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き | |||
| #連体詞 | |||
| # | |||
| ##### | |||
| # conjunction: Conjunctions that can occur independently. | |||
| # e.g. が, けれども, そして, じゃあ, それどころか | |||
| 接続詞 | |||
| # | |||
| ##### | |||
| # particle: unclassified particles. | |||
| 助詞 | |||
| # | |||
| # particle-case: case particles where the subclassification is undefined. | |||
| 助詞-格助詞 | |||
| # | |||
| # particle-case-misc: Case particles. | |||
| # e.g. から, が, で, と, に, へ, より, を, の, にて | |||
| 助詞-格助詞-一般 | |||
| # | |||
| # particle-case-quote: the "to" that appears after nouns, a person’s speech, | |||
| # quotation marks, expressions of decisions from a meeting, reasons, judgements, | |||
| # conjectures, etc. | |||
| # e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) | |||
| 助詞-格助詞-引用 | |||
| # | |||
| # particle-case-compound: Compounds of particles and verbs that mainly behave | |||
| # like case particles. | |||
| # e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, | |||
| # にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, | |||
| # にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, | |||
| # に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, | |||
| # に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, | |||
| # にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, | |||
| # にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, | |||
| # って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ | |||
| 助詞-格助詞-連語 | |||
| # | |||
| # particle-conjunctive: | |||
| # e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, | |||
| # ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, | |||
| # (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ | |||
| 助詞-接続助詞 | |||
| # | |||
| # particle-dependency: | |||
| # e.g. こそ, さえ, しか, すら, は, も, ぞ | |||
| 助詞-係助詞 | |||
| # | |||
| # particle-adverbial: | |||
| # e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, | |||
| # (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, | |||
| # (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, | |||
| # (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, | |||
| # ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) | |||
| 助詞-副助詞 | |||
| # | |||
| # particle-interjective: particles with interjective grammatical roles. | |||
| # e.g. (松島) や | |||
| 助詞-間投助詞 | |||
| # | |||
| # particle-coordinate: | |||
| # e.g. と, たり, だの, だり, とか, なり, や, やら | |||
| 助詞-並立助詞 | |||
| # | |||
| # particle-final: | |||
| # e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, | |||
| # ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ | |||
| 助詞-終助詞 | |||
| # | |||
| # particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is | |||
| # adverbial, conjunctive, or sentence final. For example: | |||
| # (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 | |||
| # (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 | |||
| # 「(祈りが届いたせい) か (, 試験に合格した.)」 | |||
| # (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 | |||
| # e.g. か | |||
| 助詞-副助詞/並立助詞/終助詞 | |||
| # | |||
| # particle-adnominalizer: The "no" that attaches to nouns and modifies | |||
| # non-inflectional words. | |||
| 助詞-連体化 | |||
| # | |||
| # particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs | |||
| # that are giongo, giseigo, or gitaigo. | |||
| # e.g. に, と | |||
| 助詞-副詞化 | |||
| # | |||
| # particle-special: A particle that does not fit into one of the above classifications. | |||
| # This includes particles that are used in Tanka, Haiku, and other poetry. | |||
| # e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) | |||
| 助詞-特殊 | |||
| # | |||
| ##### | |||
| # auxiliary-verb: | |||
| 助動詞 | |||
| # | |||
| ##### | |||
| # interjection: Greetings and other exclamations. | |||
| # e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, | |||
| # いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい | |||
| #感動詞 | |||
| # | |||
| ##### | |||
| # symbol: unclassified Symbols. | |||
| 記号 | |||
| # | |||
| # symbol-misc: A general symbol not in one of the categories below. | |||
| # e.g. [○◎@$〒→+] | |||
| 記号-一般 | |||
| # | |||
| # symbol-comma: Commas | |||
| # e.g. [,、] | |||
| 記号-読点 | |||
| # | |||
| # symbol-period: Periods and full stops. | |||
| # e.g. [..。] | |||
| 記号-句点 | |||
| # | |||
| # symbol-space: Full-width whitespace. | |||
| 記号-空白 | |||
| # | |||
| # symbol-open_bracket: | |||
| # e.g. [({‘“『【] | |||
| 記号-括弧開 | |||
| # | |||
| # symbol-close_bracket: | |||
| # e.g. [)}’”』」】] | |||
| 記号-括弧閉 | |||
| # | |||
| # symbol-alphabetic: | |||
| #記号-アルファベット | |||
| # | |||
| ##### | |||
| # other: unclassified other | |||
| #その他 | |||
| # | |||
| # other-interjection: Words that are hard to classify as noun-suffixes or | |||
| # sentence-final particles. | |||
| # e.g. (だ)ァ | |||
| その他-間投 | |||
| # | |||
| ##### | |||
| # filler: Aizuchi that occurs during a conversation or sounds inserted as filler. | |||
| # e.g. あの, うんと, えと | |||
| フィラー | |||
| # | |||
| ##### | |||
| # non-verbal: non-verbal sound. | |||
| 非言語音 | |||
| # | |||
| ##### | |||
| # fragment: | |||
| #語断片 | |||
| # | |||
| ##### | |||
| # unknown: unknown part of speech. | |||
| #未知語 | |||
| # | |||
| ##### End of file | |||
| @@ -0,0 +1,125 @@ | |||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||
| # Cleaned on October 11, 2009 (not normalized, so use before normalization) | |||
| # This means that when modifying this list, you might need to add some | |||
| # redundant entries, for example containing forms with both أ and ا | |||
| من | |||
| ومن | |||
| منها | |||
| منه | |||
| في | |||
| وفي | |||
| فيها | |||
| فيه | |||
| و | |||
| ف | |||
| ثم | |||
| او | |||
| أو | |||
| ب | |||
| بها | |||
| به | |||
| ا | |||
| أ | |||
| اى | |||
| اي | |||
| أي | |||
| أى | |||
| لا | |||
| ولا | |||
| الا | |||
| ألا | |||
| إلا | |||
| لكن | |||
| ما | |||
| وما | |||
| كما | |||
| فما | |||
| عن | |||
| مع | |||
| اذا | |||
| إذا | |||
| ان | |||
| أن | |||
| إن | |||
| انها | |||
| أنها | |||
| إنها | |||
| انه | |||
| أنه | |||
| إنه | |||
| بان | |||
| بأن | |||
| فان | |||
| فأن | |||
| وان | |||
| وأن | |||
| وإن | |||
| التى | |||
| التي | |||
| الذى | |||
| الذي | |||
| الذين | |||
| الى | |||
| الي | |||
| إلى | |||
| إلي | |||
| على | |||
| عليها | |||
| عليه | |||
| اما | |||
| أما | |||
| إما | |||
| ايضا | |||
| أيضا | |||
| كل | |||
| وكل | |||
| لم | |||
| ولم | |||
| لن | |||
| ولن | |||
| هى | |||
| هي | |||
| هو | |||
| وهى | |||
| وهي | |||
| وهو | |||
| فهى | |||
| فهي | |||
| فهو | |||
| انت | |||
| أنت | |||
| لك | |||
| لها | |||
| له | |||
| هذه | |||
| هذا | |||
| تلك | |||
| ذلك | |||
| هناك | |||
| كانت | |||
| كان | |||
| يكون | |||
| تكون | |||
| وكانت | |||
| وكان | |||
| غير | |||
| بعض | |||
| قد | |||
| نحو | |||
| بين | |||
| بينما | |||
| منذ | |||
| ضمن | |||
| حيث | |||
| الان | |||
| الآن | |||
| خلال | |||
| بعد | |||
| قبل | |||
| حتى | |||
| عند | |||
| عندما | |||
| لدى | |||
| جميع | |||
| @@ -0,0 +1,193 @@ | |||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||
| а | |||
| аз | |||
| ако | |||
| ала | |||
| бе | |||
| без | |||
| беше | |||
| би | |||
| бил | |||
| била | |||
| били | |||
| било | |||
| близо | |||
| бъдат | |||
| бъде | |||
| бяха | |||
| в | |||
| вас | |||
| ваш | |||
| ваша | |||
| вероятно | |||
| вече | |||
| взема | |||
| ви | |||
| вие | |||
| винаги | |||
| все | |||
| всеки | |||
| всички | |||
| всичко | |||
| всяка | |||
| във | |||
| въпреки | |||
| върху | |||
| г | |||
| ги | |||
| главно | |||
| го | |||
| д | |||
| да | |||
| дали | |||
| до | |||
| докато | |||
| докога | |||
| дори | |||
| досега | |||
| доста | |||
| е | |||
| едва | |||
| един | |||
| ето | |||
| за | |||
| зад | |||
| заедно | |||
| заради | |||
| засега | |||
| затова | |||
| защо | |||
| защото | |||
| и | |||
| из | |||
| или | |||
| им | |||
| има | |||
| имат | |||
| иска | |||
| й | |||
| каза | |||
| как | |||
| каква | |||
| какво | |||
| както | |||
| какъв | |||
| като | |||
| кога | |||
| когато | |||
| което | |||
| които | |||
| кой | |||
| който | |||
| колко | |||
| която | |||
| къде | |||
| където | |||
| към | |||
| ли | |||
| м | |||
| ме | |||
| между | |||
| мен | |||
| ми | |||
| мнозина | |||
| мога | |||
| могат | |||
| може | |||
| моля | |||
| момента | |||
| му | |||
| н | |||
| на | |||
| над | |||
| назад | |||
| най | |||
| направи | |||
| напред | |||
| например | |||
| нас | |||
| не | |||
| него | |||
| нея | |||
| ни | |||
| ние | |||
| никой | |||
| нито | |||
| но | |||
| някои | |||
| някой | |||
| няма | |||
| обаче | |||
| около | |||
| освен | |||
| особено | |||
| от | |||
| отгоре | |||
| отново | |||
| още | |||
| пак | |||
| по | |||
| повече | |||
| повечето | |||
| под | |||
| поне | |||
| поради | |||
| после | |||
| почти | |||
| прави | |||
| пред | |||
| преди | |||
| през | |||
| при | |||
| пък | |||
| първо | |||
| с | |||
| са | |||
| само | |||
| се | |||
| сега | |||
| си | |||
| скоро | |||
| след | |||
| сме | |||
| според | |||
| сред | |||
| срещу | |||
| сте | |||
| съм | |||
| със | |||
| също | |||
| т | |||
| тази | |||
| така | |||
| такива | |||
| такъв | |||
| там | |||
| твой | |||
| те | |||
| тези | |||
| ти | |||
| тн | |||
| то | |||
| това | |||
| тогава | |||
| този | |||
| той | |||
| толкова | |||
| точно | |||
| трябва | |||
| тук | |||
| тъй | |||
| тя | |||
| тях | |||
| у | |||
| харесва | |||
| ч | |||
| че | |||
| често | |||
| чрез | |||
| ще | |||
| щом | |||
| я | |||
| @@ -0,0 +1,220 @@ | |||
| # Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) | |||
| a | |||
| abans | |||
| ací | |||
| ah | |||
| així | |||
| això | |||
| al | |||
| als | |||
| aleshores | |||
| algun | |||
| alguna | |||
| algunes | |||
| alguns | |||
| alhora | |||
| allà | |||
| allí | |||
| allò | |||
| altra | |||
| altre | |||
| altres | |||
| amb | |||
| ambdós | |||
| ambdues | |||
| apa | |||
| aquell | |||
| aquella | |||
| aquelles | |||
| aquells | |||
| aquest | |||
| aquesta | |||
| aquestes | |||
| aquests | |||
| aquí | |||
| baix | |||
| cada | |||
| cadascú | |||
| cadascuna | |||
| cadascunes | |||
| cadascuns | |||
| com | |||
| contra | |||
| d'un | |||
| d'una | |||
| d'unes | |||
| d'uns | |||
| dalt | |||
| de | |||
| del | |||
| dels | |||
| des | |||
| després | |||
| dins | |||
| dintre | |||
| donat | |||
| doncs | |||
| durant | |||
| e | |||
| eh | |||
| el | |||
| els | |||
| em | |||
| en | |||
| encara | |||
| ens | |||
| entre | |||
| érem | |||
| eren | |||
| éreu | |||
| es | |||
| és | |||
| esta | |||
| està | |||
| estàvem | |||
| estaven | |||
| estàveu | |||
| esteu | |||
| et | |||
| etc | |||
| ets | |||
| fins | |||
| fora | |||
| gairebé | |||
| ha | |||
| han | |||
| has | |||
| havia | |||
| he | |||
| hem | |||
| heu | |||
| hi | |||
| ho | |||
| i | |||
| igual | |||
| iguals | |||
| ja | |||
| l'hi | |||
| la | |||
| les | |||
| li | |||
| li'n | |||
| llavors | |||
| m'he | |||
| ma | |||
| mal | |||
| malgrat | |||
| mateix | |||
| mateixa | |||
| mateixes | |||
| mateixos | |||
| me | |||
| mentre | |||
| més | |||
| meu | |||
| meus | |||
| meva | |||
| meves | |||
| molt | |||
| molta | |||
| moltes | |||
| molts | |||
| mon | |||
| mons | |||
| n'he | |||
| n'hi | |||
| ne | |||
| ni | |||
| no | |||
| nogensmenys | |||
| només | |||
| nosaltres | |||
| nostra | |||
| nostre | |||
| nostres | |||
| o | |||
| oh | |||
| oi | |||
| on | |||
| pas | |||
| pel | |||
| pels | |||
| per | |||
| però | |||
| perquè | |||
| poc | |||
| poca | |||
| pocs | |||
| poques | |||
| potser | |||
| propi | |||
| qual | |||
| quals | |||
| quan | |||
| quant | |||
| que | |||
| què | |||
| quelcom | |||
| qui | |||
| quin | |||
| quina | |||
| quines | |||
| quins | |||
| s'ha | |||
| s'han | |||
| sa | |||
| semblant | |||
| semblants | |||
| ses | |||
| seu | |||
| seus | |||
| seva | |||
| seva | |||
| seves | |||
| si | |||
| sobre | |||
| sobretot | |||
| sóc | |||
| solament | |||
| sols | |||
| son | |||
| són | |||
| sons | |||
| sota | |||
| sou | |||
| t'ha | |||
| t'han | |||
| t'he | |||
| ta | |||
| tal | |||
| també | |||
| tampoc | |||
| tan | |||
| tant | |||
| tanta | |||
| tantes | |||
| teu | |||
| teus | |||
| teva | |||
| teves | |||
| ton | |||
| tons | |||
| tot | |||
| tota | |||
| totes | |||
| tots | |||
| un | |||
| una | |||
| unes | |||
| uns | |||
| us | |||
| va | |||
| vaig | |||
| vam | |||
| van | |||
| vas | |||
| veu | |||
| vosaltres | |||
| vostra | |||
| vostre | |||
| vostres | |||
| @@ -0,0 +1,172 @@ | |||
| a | |||
| s | |||
| k | |||
| o | |||
| i | |||
| u | |||
| v | |||
| z | |||
| dnes | |||
| cz | |||
| tímto | |||
| budeš | |||
| budem | |||
| byli | |||
| jseš | |||
| můj | |||
| svým | |||
| ta | |||
| tomto | |||
| tohle | |||
| tuto | |||
| tyto | |||
| jej | |||
| zda | |||
| proč | |||
| máte | |||
| tato | |||
| kam | |||
| tohoto | |||
| kdo | |||
| kteří | |||
| mi | |||
| nám | |||
| tom | |||
| tomuto | |||
| mít | |||
| nic | |||
| proto | |||
| kterou | |||
| byla | |||
| toho | |||
| protože | |||
| asi | |||
| ho | |||
| naši | |||
| napište | |||
| re | |||
| což | |||
| tím | |||
| takže | |||
| svých | |||
| její | |||
| svými | |||
| jste | |||
| aj | |||
| tu | |||
| tedy | |||
| teto | |||
| bylo | |||
| kde | |||
| ke | |||
| pravé | |||
| ji | |||
| nad | |||
| nejsou | |||
| či | |||
| pod | |||
| téma | |||
| mezi | |||
| přes | |||
| ty | |||
| pak | |||
| vám | |||
| ani | |||
| když | |||
| však | |||
| neg | |||
| jsem | |||
| tento | |||
| článku | |||
| články | |||
| aby | |||
| jsme | |||
| před | |||
| pta | |||
| jejich | |||
| byl | |||
| ještě | |||
| až | |||
| bez | |||
| také | |||
| pouze | |||
| první | |||
| vaše | |||
| která | |||
| nás | |||
| nový | |||
| tipy | |||
| pokud | |||
| může | |||
| strana | |||
| jeho | |||
| své | |||
| jiné | |||
| zprávy | |||
| nové | |||
| není | |||
| vás | |||
| jen | |||
| podle | |||
| zde | |||
| už | |||
| být | |||
| více | |||
| bude | |||
| již | |||
| než | |||
| který | |||
| by | |||
| které | |||
| co | |||
| nebo | |||
| ten | |||
| tak | |||
| má | |||
| při | |||
| od | |||
| po | |||
| jsou | |||
| jak | |||
| další | |||
| ale | |||
| si | |||
| se | |||
| ve | |||
| to | |||
| jako | |||
| za | |||
| zpět | |||
| ze | |||
| do | |||
| pro | |||
| je | |||
| na | |||
| atd | |||
| atp | |||
| jakmile | |||
| přičemž | |||
| já | |||
| on | |||
| ona | |||
| ono | |||
| oni | |||
| ony | |||
| my | |||
| vy | |||
| jí | |||
| ji | |||
| mě | |||
| mne | |||
| jemu | |||
| tomu | |||
| těm | |||
| těmu | |||
| němu | |||
| němuž | |||
| jehož | |||
| jíž | |||
| jelikož | |||
| jež | |||
| jakož | |||
| načež | |||
| @@ -0,0 +1,110 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | A Danish stop word list. Comments begin with vertical bar. Each stop | |||
| | word is at the start of a line. | |||
| | This is a ranked list (commonest to rarest) of stopwords derived from | |||
| | a large text sample. | |||
| og | and | |||
| i | in | |||
| jeg | I | |||
| det | that (dem. pronoun)/it (pers. pronoun) | |||
| at | that (in front of a sentence)/to (with infinitive) | |||
| en | a/an | |||
| den | it (pers. pronoun)/that (dem. pronoun) | |||
| til | to/at/for/until/against/by/of/into, more | |||
| er | present tense of "to be" | |||
| som | who, as | |||
| på | on/upon/in/on/at/to/after/of/with/for, on | |||
| de | they | |||
| med | with/by/in, along | |||
| han | he | |||
| af | of/by/from/off/for/in/with/on, off | |||
| for | at/for/to/from/by/of/ago, in front/before, because | |||
| ikke | not | |||
| der | who/which, there/those | |||
| var | past tense of "to be" | |||
| mig | me/myself | |||
| sig | oneself/himself/herself/itself/themselves | |||
| men | but | |||
| et | a/an/one, one (number), someone/somebody/one | |||
| har | present tense of "to have" | |||
| om | round/about/for/in/a, about/around/down, if | |||
| vi | we | |||
| min | my | |||
| havde | past tense of "to have" | |||
| ham | him | |||
| hun | she | |||
| nu | now | |||
| over | over/above/across/by/beyond/past/on/about, over/past | |||
| da | then, when/as/since | |||
| fra | from/off/since, off, since | |||
| du | you | |||
| ud | out | |||
| sin | his/her/its/one's | |||
| dem | them | |||
| os | us/ourselves | |||
| op | up | |||
| man | you/one | |||
| hans | his | |||
| hvor | where | |||
| eller | or | |||
| hvad | what | |||
| skal | must/shall etc. | |||
| selv | myself/youself/herself/ourselves etc., even | |||
| her | here | |||
| alle | all/everyone/everybody etc. | |||
| vil | will (verb) | |||
| blev | past tense of "to stay/to remain/to get/to become" | |||
| kunne | could | |||
| ind | in | |||
| når | when | |||
| være | present tense of "to be" | |||
| dog | however/yet/after all | |||
| noget | something | |||
| ville | would | |||
| jo | you know/you see (adv), yes | |||
| deres | their/theirs | |||
| efter | after/behind/according to/for/by/from, later/afterwards | |||
| ned | down | |||
| skulle | should | |||
| denne | this | |||
| end | than | |||
| dette | this | |||
| mit | my/mine | |||
| også | also | |||
| under | under/beneath/below/during, below/underneath | |||
| have | have | |||
| dig | you | |||
| anden | other | |||
| hende | her | |||
| mine | my | |||
| alt | everything | |||
| meget | much/very, plenty of | |||
| sit | his, her, its, one's | |||
| sine | his, her, its, one's | |||
| vor | our | |||
| mod | against | |||
| disse | these | |||
| hvis | if | |||
| din | your/yours | |||
| nogle | some | |||
| hos | by/at | |||
| blive | be/become | |||
| mange | many | |||
| ad | by/through | |||
| bliver | present tense of "to be/to become" | |||
| hendes | her/hers | |||
| været | be | |||
| thi | for (conj) | |||
| jer | you | |||
| sådan | such, like this/like that | |||
| @@ -0,0 +1,294 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | A German stop word list. Comments begin with vertical bar. Each stop | |||
| | word is at the start of a line. | |||
| | The number of forms in this list is reduced significantly by passing it | |||
| | through the German stemmer. | |||
| aber | but | |||
| alle | all | |||
| allem | |||
| allen | |||
| aller | |||
| alles | |||
| als | than, as | |||
| also | so | |||
| am | an + dem | |||
| an | at | |||
| ander | other | |||
| andere | |||
| anderem | |||
| anderen | |||
| anderer | |||
| anderes | |||
| anderm | |||
| andern | |||
| anderr | |||
| anders | |||
| auch | also | |||
| auf | on | |||
| aus | out of | |||
| bei | by | |||
| bin | am | |||
| bis | until | |||
| bist | art | |||
| da | there | |||
| damit | with it | |||
| dann | then | |||
| der | the | |||
| den | |||
| des | |||
| dem | |||
| die | |||
| das | |||
| daß | that | |||
| derselbe | the same | |||
| derselben | |||
| denselben | |||
| desselben | |||
| demselben | |||
| dieselbe | |||
| dieselben | |||
| dasselbe | |||
| dazu | to that | |||
| dein | thy | |||
| deine | |||
| deinem | |||
| deinen | |||
| deiner | |||
| deines | |||
| denn | because | |||
| derer | of those | |||
| dessen | of him | |||
| dich | thee | |||
| dir | to thee | |||
| du | thou | |||
| dies | this | |||
| diese | |||
| diesem | |||
| diesen | |||
| dieser | |||
| dieses | |||
| doch | (several meanings) | |||
| dort | (over) there | |||
| durch | through | |||
| ein | a | |||
| eine | |||
| einem | |||
| einen | |||
| einer | |||
| eines | |||
| einig | some | |||
| einige | |||
| einigem | |||
| einigen | |||
| einiger | |||
| einiges | |||
| einmal | once | |||
| er | he | |||
| ihn | him | |||
| ihm | to him | |||
| es | it | |||
| etwas | something | |||
| euer | your | |||
| eure | |||
| eurem | |||
| euren | |||
| eurer | |||
| eures | |||
| für | for | |||
| gegen | towards | |||
| gewesen | p.p. of sein | |||
| hab | have | |||
| habe | have | |||
| haben | have | |||
| hat | has | |||
| hatte | had | |||
| hatten | had | |||
| hier | here | |||
| hin | there | |||
| hinter | behind | |||
| ich | I | |||
| mich | me | |||
| mir | to me | |||
| ihr | you, to her | |||
| ihre | |||
| ihrem | |||
| ihren | |||
| ihrer | |||
| ihres | |||
| euch | to you | |||
| im | in + dem | |||
| in | in | |||
| indem | while | |||
| ins | in + das | |||
| ist | is | |||
| jede | each, every | |||
| jedem | |||
| jeden | |||
| jeder | |||
| jedes | |||
| jene | that | |||
| jenem | |||
| jenen | |||
| jener | |||
| jenes | |||
| jetzt | now | |||
| kann | can | |||
| kein | no | |||
| keine | |||
| keinem | |||
| keinen | |||
| keiner | |||
| keines | |||
| können | can | |||
| könnte | could | |||
| machen | do | |||
| man | one | |||
| manche | some, many a | |||
| manchem | |||
| manchen | |||
| mancher | |||
| manches | |||
| mein | my | |||
| meine | |||
| meinem | |||
| meinen | |||
| meiner | |||
| meines | |||
| mit | with | |||
| muss | must | |||
| musste | had to | |||
| nach | to(wards) | |||
| nicht | not | |||
| nichts | nothing | |||
| noch | still, yet | |||
| nun | now | |||
| nur | only | |||
| ob | whether | |||
| oder | or | |||
| ohne | without | |||
| sehr | very | |||
| sein | his | |||
| seine | |||
| seinem | |||
| seinen | |||
| seiner | |||
| seines | |||
| selbst | self | |||
| sich | herself | |||
| sie | they, she | |||
| ihnen | to them | |||
| sind | are | |||
| so | so | |||
| solche | such | |||
| solchem | |||
| solchen | |||
| solcher | |||
| solches | |||
| soll | shall | |||
| sollte | should | |||
| sondern | but | |||
| sonst | else | |||
| über | over | |||
| um | about, around | |||
| und | and | |||
| uns | us | |||
| unse | |||
| unsem | |||
| unsen | |||
| unser | |||
| unses | |||
| unter | under | |||
| viel | much | |||
| vom | von + dem | |||
| von | from | |||
| vor | before | |||
| während | while | |||
| war | was | |||
| waren | were | |||
| warst | wast | |||
| was | what | |||
| weg | away, off | |||
| weil | because | |||
| weiter | further | |||
| welche | which | |||
| welchem | |||
| welchen | |||
| welcher | |||
| welches | |||
| wenn | when | |||
| werde | will | |||
| werden | will | |||
| wie | how | |||
| wieder | again | |||
| will | want | |||
| wir | we | |||
| wird | will | |||
| wirst | willst | |||
| wo | where | |||
| wollen | want | |||
| wollte | wanted | |||
| würde | would | |||
| würden | would | |||
| zu | to | |||
| zum | zu + dem | |||
| zur | zu + der | |||
| zwar | indeed | |||
| zwischen | between | |||
| @@ -0,0 +1,78 @@ | |||
| # Lucene Greek Stopwords list | |||
| # Note: by default this file is used after GreekLowerCaseFilter, | |||
| # so when modifying this file use 'σ' instead of 'ς' | |||
| ο | |||
| η | |||
| το | |||
| οι | |||
| τα | |||
| του | |||
| τησ | |||
| των | |||
| τον | |||
| την | |||
| και | |||
| κι | |||
| κ | |||
| ειμαι | |||
| εισαι | |||
| ειναι | |||
| ειμαστε | |||
| ειστε | |||
| στο | |||
| στον | |||
| στη | |||
| στην | |||
| μα | |||
| αλλα | |||
| απο | |||
| για | |||
| προσ | |||
| με | |||
| σε | |||
| ωσ | |||
| παρα | |||
| αντι | |||
| κατα | |||
| μετα | |||
| θα | |||
| να | |||
| δε | |||
| δεν | |||
| μη | |||
| μην | |||
| επι | |||
| ενω | |||
| εαν | |||
| αν | |||
| τοτε | |||
| που | |||
| πωσ | |||
| ποιοσ | |||
| ποια | |||
| ποιο | |||
| ποιοι | |||
| ποιεσ | |||
| ποιων | |||
| ποιουσ | |||
| αυτοσ | |||
| αυτη | |||
| αυτο | |||
| αυτοι | |||
| αυτων | |||
| αυτουσ | |||
| αυτεσ | |||
| αυτα | |||
| εκεινοσ | |||
| εκεινη | |||
| εκεινο | |||
| εκεινοι | |||
| εκεινεσ | |||
| εκεινα | |||
| εκεινων | |||
| εκεινουσ | |||
| οπωσ | |||
| ομωσ | |||
| ισωσ | |||
| οσο | |||
| οτι | |||
| @@ -0,0 +1,54 @@ | |||
| # Licensed to the Apache Software Foundation (ASF) under one or more | |||
| # contributor license agreements. See the NOTICE file distributed with | |||
| # this work for additional information regarding copyright ownership. | |||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |||
| # (the "License"); you may not use this file except in compliance with | |||
| # the License. You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # a couple of test stopwords to test that the words are really being | |||
| # configured from this file: | |||
| stopworda | |||
| stopwordb | |||
| # Standard english stop words taken from Lucene's StopAnalyzer | |||
| a | |||
| an | |||
| and | |||
| are | |||
| as | |||
| at | |||
| be | |||
| but | |||
| by | |||
| for | |||
| if | |||
| in | |||
| into | |||
| is | |||
| it | |||
| no | |||
| not | |||
| of | |||
| on | |||
| or | |||
| such | |||
| that | |||
| the | |||
| their | |||
| then | |||
| there | |||
| these | |||
| they | |||
| this | |||
| to | |||
| was | |||
| will | |||
| with | |||
| @@ -0,0 +1,356 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | A Spanish stop word list. Comments begin with vertical bar. Each stop | |||
| | word is at the start of a line. | |||
| | The following is a ranked list (commonest to rarest) of stopwords | |||
| | deriving from a large sample of text. | |||
| | Extra words have been added at the end. | |||
| de | from, of | |||
| la | the, her | |||
| que | who, that | |||
| el | the | |||
| en | in | |||
| y | and | |||
| a | to | |||
| los | the, them | |||
| del | de + el | |||
| se | himself, from him etc | |||
| las | the, them | |||
| por | for, by, etc | |||
| un | a | |||
| para | for | |||
| con | with | |||
| no | no | |||
| una | a | |||
| su | his, her | |||
| al | a + el | |||
| | es from SER | |||
| lo | him | |||
| como | how | |||
| más | more | |||
| pero | pero | |||
| sus | su plural | |||
| le | to him, her | |||
| ya | already | |||
| o | or | |||
| | fue from SER | |||
| este | this | |||
| | ha from HABER | |||
| sí | himself etc | |||
| porque | because | |||
| esta | this | |||
| | son from SER | |||
| entre | between | |||
| | está from ESTAR | |||
| cuando | when | |||
| muy | very | |||
| sin | without | |||
| sobre | on | |||
| | ser from SER | |||
| | tiene from TENER | |||
| también | also | |||
| me | me | |||
| hasta | until | |||
| hay | there is/are | |||
| donde | where | |||
| | han from HABER | |||
| quien | whom, that | |||
| | están from ESTAR | |||
| | estado from ESTAR | |||
| desde | from | |||
| todo | all | |||
| nos | us | |||
| durante | during | |||
| | estados from ESTAR | |||
| todos | all | |||
| uno | a | |||
| les | to them | |||
| ni | nor | |||
| contra | against | |||
| otros | other | |||
| | fueron from SER | |||
| ese | that | |||
| eso | that | |||
| | había from HABER | |||
| ante | before | |||
| ellos | they | |||
| e | and (variant of y) | |||
| esto | this | |||
| mí | me | |||
| antes | before | |||
| algunos | some | |||
| qué | what? | |||
| unos | a | |||
| yo | I | |||
| otro | other | |||
| otras | other | |||
| otra | other | |||
| él | he | |||
| tanto | so much, many | |||
| esa | that | |||
| estos | these | |||
| mucho | much, many | |||
| quienes | who | |||
| nada | nothing | |||
| muchos | many | |||
| cual | who | |||
| | sea from SER | |||
| poco | few | |||
| ella | she | |||
| estar | to be | |||
| | haber from HABER | |||
| estas | these | |||
| | estaba from ESTAR | |||
| | estamos from ESTAR | |||
| algunas | some | |||
| algo | something | |||
| nosotros | we | |||
| | other forms | |||
| mi | me | |||
| mis | mi plural | |||
| tú | thou | |||
| te | thee | |||
| ti | thee | |||
| tu | thy | |||
| tus | tu plural | |||
| ellas | they | |||
| nosotras | we | |||
| vosotros | you | |||
| vosotras | you | |||
| os | you | |||
| mío | mine | |||
| mía | | |||
| míos | | |||
| mías | | |||
| tuyo | thine | |||
| tuya | | |||
| tuyos | | |||
| tuyas | | |||
| suyo | his, hers, theirs | |||
| suya | | |||
| suyos | | |||
| suyas | | |||
| nuestro | ours | |||
| nuestra | | |||
| nuestros | | |||
| nuestras | | |||
| vuestro | yours | |||
| vuestra | | |||
| vuestros | | |||
| vuestras | | |||
| esos | those | |||
| esas | those | |||
| | forms of estar, to be (not including the infinitive): | |||
| estoy | |||
| estás | |||
| está | |||
| estamos | |||
| estáis | |||
| están | |||
| esté | |||
| estés | |||
| estemos | |||
| estéis | |||
| estén | |||
| estaré | |||
| estarás | |||
| estará | |||
| estaremos | |||
| estaréis | |||
| estarán | |||
| estaría | |||
| estarías | |||
| estaríamos | |||
| estaríais | |||
| estarían | |||
| estaba | |||
| estabas | |||
| estábamos | |||
| estabais | |||
| estaban | |||
| estuve | |||
| estuviste | |||
| estuvo | |||
| estuvimos | |||
| estuvisteis | |||
| estuvieron | |||
| estuviera | |||
| estuvieras | |||
| estuviéramos | |||
| estuvierais | |||
| estuvieran | |||
| estuviese | |||
| estuvieses | |||
| estuviésemos | |||
| estuvieseis | |||
| estuviesen | |||
| estando | |||
| estado | |||
| estada | |||
| estados | |||
| estadas | |||
| estad | |||
| | forms of haber, to have (not including the infinitive): | |||
| he | |||
| has | |||
| ha | |||
| hemos | |||
| habéis | |||
| han | |||
| haya | |||
| hayas | |||
| hayamos | |||
| hayáis | |||
| hayan | |||
| habré | |||
| habrás | |||
| habrá | |||
| habremos | |||
| habréis | |||
| habrán | |||
| habría | |||
| habrías | |||
| habríamos | |||
| habríais | |||
| habrían | |||
| había | |||
| habías | |||
| habíamos | |||
| habíais | |||
| habían | |||
| hube | |||
| hubiste | |||
| hubo | |||
| hubimos | |||
| hubisteis | |||
| hubieron | |||
| hubiera | |||
| hubieras | |||
| hubiéramos | |||
| hubierais | |||
| hubieran | |||
| hubiese | |||
| hubieses | |||
| hubiésemos | |||
| hubieseis | |||
| hubiesen | |||
| habiendo | |||
| habido | |||
| habida | |||
| habidos | |||
| habidas | |||
| | forms of ser, to be (not including the infinitive): | |||
| soy | |||
| eres | |||
| es | |||
| somos | |||
| sois | |||
| son | |||
| sea | |||
| seas | |||
| seamos | |||
| seáis | |||
| sean | |||
| seré | |||
| serás | |||
| será | |||
| seremos | |||
| seréis | |||
| serán | |||
| sería | |||
| serías | |||
| seríamos | |||
| seríais | |||
| serían | |||
| era | |||
| eras | |||
| éramos | |||
| erais | |||
| eran | |||
| fui | |||
| fuiste | |||
| fue | |||
| fuimos | |||
| fuisteis | |||
| fueron | |||
| fuera | |||
| fueras | |||
| fuéramos | |||
| fuerais | |||
| fueran | |||
| fuese | |||
| fueses | |||
| fuésemos | |||
| fueseis | |||
| fuesen | |||
| siendo | |||
| sido | |||
| | sed also means 'thirst' | |||
| | forms of tener, to have (not including the infinitive): | |||
| tengo | |||
| tienes | |||
| tiene | |||
| tenemos | |||
| tenéis | |||
| tienen | |||
| tenga | |||
| tengas | |||
| tengamos | |||
| tengáis | |||
| tengan | |||
| tendré | |||
| tendrás | |||
| tendrá | |||
| tendremos | |||
| tendréis | |||
| tendrán | |||
| tendría | |||
| tendrías | |||
| tendríamos | |||
| tendríais | |||
| tendrían | |||
| tenía | |||
| tenías | |||
| teníamos | |||
| teníais | |||
| tenían | |||
| tuve | |||
| tuviste | |||
| tuvo | |||
| tuvimos | |||
| tuvisteis | |||
| tuvieron | |||
| tuviera | |||
| tuvieras | |||
| tuviéramos | |||
| tuvierais | |||
| tuvieran | |||
| tuviese | |||
| tuvieses | |||
| tuviésemos | |||
| tuvieseis | |||
| tuviesen | |||
| teniendo | |||
| tenido | |||
| tenida | |||
| tenidos | |||
| tenidas | |||
| tened | |||
| @@ -0,0 +1,99 @@ | |||
| # example set of basque stopwords | |||
| al | |||
| anitz | |||
| arabera | |||
| asko | |||
| baina | |||
| bat | |||
| batean | |||
| batek | |||
| bati | |||
| batzuei | |||
| batzuek | |||
| batzuetan | |||
| batzuk | |||
| bera | |||
| beraiek | |||
| berau | |||
| berauek | |||
| bere | |||
| berori | |||
| beroriek | |||
| beste | |||
| bezala | |||
| da | |||
| dago | |||
| dira | |||
| ditu | |||
| du | |||
| dute | |||
| edo | |||
| egin | |||
| ere | |||
| eta | |||
| eurak | |||
| ez | |||
| gainera | |||
| gu | |||
| gutxi | |||
| guzti | |||
| haiei | |||
| haiek | |||
| haietan | |||
| hainbeste | |||
| hala | |||
| han | |||
| handik | |||
| hango | |||
| hara | |||
| hari | |||
| hark | |||
| hartan | |||
| hau | |||
| hauei | |||
| hauek | |||
| hauetan | |||
| hemen | |||
| hemendik | |||
| hemengo | |||
| hi | |||
| hona | |||
| honek | |||
| honela | |||
| honetan | |||
| honi | |||
| hor | |||
| hori | |||
| horiei | |||
| horiek | |||
| horietan | |||
| horko | |||
| horra | |||
| horrek | |||
| horrela | |||
| horretan | |||
| horri | |||
| hortik | |||
| hura | |||
| izan | |||
| ni | |||
| noiz | |||
| nola | |||
| non | |||
| nondik | |||
| nongo | |||
| nor | |||
| nora | |||
| ze | |||
| zein | |||
| zen | |||
| zenbait | |||
| zenbat | |||
| zer | |||
| zergatik | |||
| ziren | |||
| zituen | |||
| zu | |||
| zuek | |||
| zuen | |||
| zuten | |||
| @@ -0,0 +1,313 @@ | |||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||
| # Note: by default this file is used after normalization, so when adding entries | |||
| # to this file, use the arabic 'ي' instead of 'ی' | |||
| انان | |||
| نداشته | |||
| سراسر | |||
| خياه | |||
| ايشان | |||
| وي | |||
| تاكنون | |||
| بيشتري | |||
| دوم | |||
| پس | |||
| ناشي | |||
| وگو | |||
| يا | |||
| داشتند | |||
| سپس | |||
| هنگام | |||
| هرگز | |||
| پنج | |||
| نشان | |||
| امسال | |||
| ديگر | |||
| گروهي | |||
| شدند | |||
| چطور | |||
| ده | |||
| و | |||
| دو | |||
| نخستين | |||
| ولي | |||
| چرا | |||
| چه | |||
| وسط | |||
| ه | |||
| كدام | |||
| قابل | |||
| يك | |||
| رفت | |||
| هفت | |||
| همچنين | |||
| در | |||
| هزار | |||
| بله | |||
| بلي | |||
| شايد | |||
| اما | |||
| شناسي | |||
| گرفته | |||
| دهد | |||
| داشته | |||
| دانست | |||
| داشتن | |||
| خواهيم | |||
| ميليارد | |||
| وقتيكه | |||
| امد | |||
| خواهد | |||
| جز | |||
| اورده | |||
| شده | |||
| بلكه | |||
| خدمات | |||
| شدن | |||
| برخي | |||
| نبود | |||
| بسياري | |||
| جلوگيري | |||
| حق | |||
| كردند | |||
| نوعي | |||
| بعري | |||
| نكرده | |||
| نظير | |||
| نبايد | |||
| بوده | |||
| بودن | |||
| داد | |||
| اورد | |||
| هست | |||
| جايي | |||
| شود | |||
| دنبال | |||
| داده | |||
| بايد | |||
| سابق | |||
| هيچ | |||
| همان | |||
| انجا | |||
| كمتر | |||
| كجاست | |||
| گردد | |||
| كسي | |||
| تر | |||
| مردم | |||
| تان | |||
| دادن | |||
| بودند | |||
| سري | |||
| جدا | |||
| ندارند | |||
| مگر | |||
| يكديگر | |||
| دارد | |||
| دهند | |||
| بنابراين | |||
| هنگامي | |||
| سمت | |||
| جا | |||
| انچه | |||
| خود | |||
| دادند | |||
| زياد | |||
| دارند | |||
| اثر | |||
| بدون | |||
| بهترين | |||
| بيشتر | |||
| البته | |||
| به | |||
| براساس | |||
| بيرون | |||
| كرد | |||
| بعضي | |||
| گرفت | |||
| توي | |||
| اي | |||
| ميليون | |||
| او | |||
| جريان | |||
| تول | |||
| بر | |||
| مانند | |||
| برابر | |||
| باشيم | |||
| مدتي | |||
| گويند | |||
| اكنون | |||
| تا | |||
| تنها | |||
| جديد | |||
| چند | |||
| بي | |||
| نشده | |||
| كردن | |||
| كردم | |||
| گويد | |||
| كرده | |||
| كنيم | |||
| نمي | |||
| نزد | |||
| روي | |||
| قصد | |||
| فقط | |||
| بالاي | |||
| ديگران | |||
| اين | |||
| ديروز | |||
| توسط | |||
| سوم | |||
| ايم | |||
| دانند | |||
| سوي | |||
| استفاده | |||
| شما | |||
| كنار | |||
| داريم | |||
| ساخته | |||
| طور | |||
| امده | |||
| رفته | |||
| نخست | |||
| بيست | |||
| نزديك | |||
| طي | |||
| كنيد | |||
| از | |||
| انها | |||
| تمامي | |||
| داشت | |||
| يكي | |||
| طريق | |||
| اش | |||
| چيست | |||
| روب | |||
| نمايد | |||
| گفت | |||
| چندين | |||
| چيزي | |||
| تواند | |||
| ام | |||
| ايا | |||
| با | |||
| ان | |||
| ايد | |||
| ترين | |||
| اينكه | |||
| ديگري | |||
| راه | |||
| هايي | |||
| بروز | |||
| همچنان | |||
| پاعين | |||
| كس | |||
| حدود | |||
| مختلف | |||
| مقابل | |||
| چيز | |||
| گيرد | |||
| ندارد | |||
| ضد | |||
| همچون | |||
| سازي | |||
| شان | |||
| مورد | |||
| باره | |||
| مرسي | |||
| خويش | |||
| برخوردار | |||
| چون | |||
| خارج | |||
| شش | |||
| هنوز | |||
| تحت | |||
| ضمن | |||
| هستيم | |||
| گفته | |||
| فكر | |||
| بسيار | |||
| پيش | |||
| براي | |||
| روزهاي | |||
| انكه | |||
| نخواهد | |||
| بالا | |||
| كل | |||
| وقتي | |||
| كي | |||
| چنين | |||
| كه | |||
| گيري | |||
| نيست | |||
| است | |||
| كجا | |||
| كند | |||
| نيز | |||
| يابد | |||
| بندي | |||
| حتي | |||
| توانند | |||
| عقب | |||
| خواست | |||
| كنند | |||
| بين | |||
| تمام | |||
| همه | |||
| ما | |||
| باشند | |||
| مثل | |||
| شد | |||
| اري | |||
| باشد | |||
| اره | |||
| طبق | |||
| بعد | |||
| اگر | |||
| صورت | |||
| غير | |||
| جاي | |||
| بيش | |||
| ريزي | |||
| اند | |||
| زيرا | |||
| چگونه | |||
| بار | |||
| لطفا | |||
| مي | |||
| درباره | |||
| من | |||
| ديده | |||
| همين | |||
| گذاري | |||
| برداري | |||
| علت | |||
| گذاشته | |||
| هم | |||
| فوق | |||
| نه | |||
| ها | |||
| شوند | |||
| اباد | |||
| همواره | |||
| هر | |||
| اول | |||
| خواهند | |||
| چهار | |||
| نام | |||
| امروز | |||
| مان | |||
| هاي | |||
| قبل | |||
| كنم | |||
| سعي | |||
| تازه | |||
| را | |||
| هستند | |||
| زير | |||
| جلوي | |||
| عنوان | |||
| بود | |||
| @@ -0,0 +1,97 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | forms of BE | |||
| olla | |||
| olen | |||
| olet | |||
| on | |||
| olemme | |||
| olette | |||
| ovat | |||
| ole | negative form | |||
| oli | |||
| olisi | |||
| olisit | |||
| olisin | |||
| olisimme | |||
| olisitte | |||
| olisivat | |||
| olit | |||
| olin | |||
| olimme | |||
| olitte | |||
| olivat | |||
| ollut | |||
| olleet | |||
| en | negation | |||
| et | |||
| ei | |||
| emme | |||
| ette | |||
| eivät | |||
| |Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans | |||
| minä minun minut minua minussa minusta minuun minulla minulta minulle | I | |||
| sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you | |||
| hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she | |||
| me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we | |||
| te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you | |||
| he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they | |||
| tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this | |||
| tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that | |||
| se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it | |||
| nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these | |||
| nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those | |||
| ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they | |||
| kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who | |||
| ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) | |||
| mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what | |||
| mitkä | (pl) | |||
| joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which | |||
| jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) | |||
| | conjunctions | |||
| että | that | |||
| ja | and | |||
| jos | if | |||
| koska | because | |||
| kuin | than | |||
| mutta | but | |||
| niin | so | |||
| sekä | and | |||
| sillä | for | |||
| tai | or | |||
| vaan | but | |||
| vai | or | |||
| vaikka | although | |||
| | prepositions | |||
| kanssa | with | |||
| mukaan | according to | |||
| noin | about | |||
| poikki | across | |||
| yli | over, across | |||
| | other | |||
| kun | when | |||
| niin | so | |||
| nyt | now | |||
| itse | self | |||
| @@ -0,0 +1,186 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | A French stop word list. Comments begin with vertical bar. Each stop | |||
| | word is at the start of a line. | |||
| au | a + le | |||
| aux | a + les | |||
| avec | with | |||
| ce | this | |||
| ces | these | |||
| dans | with | |||
| de | of | |||
| des | de + les | |||
| du | de + le | |||
| elle | she | |||
| en | `of them' etc | |||
| et | and | |||
| eux | them | |||
| il | he | |||
| je | I | |||
| la | the | |||
| le | the | |||
| leur | their | |||
| lui | him | |||
| ma | my (fem) | |||
| mais | but | |||
| me | me | |||
| même | same; as in moi-même (myself) etc | |||
| mes | me (pl) | |||
| moi | me | |||
| mon | my (masc) | |||
| ne | not | |||
| nos | our (pl) | |||
| notre | our | |||
| nous | we | |||
| on | one | |||
| ou | where | |||
| par | by | |||
| pas | not | |||
| pour | for | |||
| qu | que before vowel | |||
| que | that | |||
| qui | who | |||
| sa | his, her (fem) | |||
| se | oneself | |||
| ses | his (pl) | |||
| son | his, her (masc) | |||
| sur | on | |||
| ta | thy (fem) | |||
| te | thee | |||
| tes | thy (pl) | |||
| toi | thee | |||
| ton | thy (masc) | |||
| tu | thou | |||
| un | a | |||
| une | a | |||
| vos | your (pl) | |||
| votre | your | |||
| vous | you | |||
| | single letter forms | |||
| c | c' | |||
| d | d' | |||
| j | j' | |||
| l | l' | |||
| à | to, at | |||
| m | m' | |||
| n | n' | |||
| s | s' | |||
| t | t' | |||
| y | there | |||
| | forms of être (not including the infinitive): | |||
| été | |||
| étée | |||
| étées | |||
| étés | |||
| étant | |||
| suis | |||
| es | |||
| est | |||
| sommes | |||
| êtes | |||
| sont | |||
| serai | |||
| seras | |||
| sera | |||
| serons | |||
| serez | |||
| seront | |||
| serais | |||
| serait | |||
| serions | |||
| seriez | |||
| seraient | |||
| étais | |||
| était | |||
| étions | |||
| étiez | |||
| étaient | |||
| fus | |||
| fut | |||
| fûmes | |||
| fûtes | |||
| furent | |||
| sois | |||
| soit | |||
| soyons | |||
| soyez | |||
| soient | |||
| fusse | |||
| fusses | |||
| fût | |||
| fussions | |||
| fussiez | |||
| fussent | |||
| | forms of avoir (not including the infinitive): | |||
| ayant | |||
| eu | |||
| eue | |||
| eues | |||
| eus | |||
| ai | |||
| as | |||
| avons | |||
| avez | |||
| ont | |||
| aurai | |||
| auras | |||
| aura | |||
| aurons | |||
| aurez | |||
| auront | |||
| aurais | |||
| aurait | |||
| aurions | |||
| auriez | |||
| auraient | |||
| avais | |||
| avait | |||
| avions | |||
| aviez | |||
| avaient | |||
| eut | |||
| eûmes | |||
| eûtes | |||
| eurent | |||
| aie | |||
| aies | |||
| ait | |||
| ayons | |||
| ayez | |||
| aient | |||
| eusse | |||
| eusses | |||
| eût | |||
| eussions | |||
| eussiez | |||
| eussent | |||
| | Later additions (from Jean-Christophe Deschamps) | |||
| ceci | this | |||
| cela | that | |||
| celà | that | |||
| cet | this | |||
| cette | this | |||
| ici | here | |||
| ils | they | |||
| les | the (pl) | |||
| leurs | their (pl) | |||
| quel | which | |||
| quels | which | |||
| quelle | which | |||
| quelles | which | |||
| sans | without | |||
| soi | oneself | |||
| @@ -0,0 +1,110 @@ | |||
| a | |||
| ach | |||
| ag | |||
| agus | |||
| an | |||
| aon | |||
| ar | |||
| arna | |||
| as | |||
| b' | |||
| ba | |||
| beirt | |||
| bhúr | |||
| caoga | |||
| ceathair | |||
| ceathrar | |||
| chomh | |||
| chtó | |||
| chuig | |||
| chun | |||
| cois | |||
| céad | |||
| cúig | |||
| cúigear | |||
| d' | |||
| daichead | |||
| dar | |||
| de | |||
| deich | |||
| deichniúr | |||
| den | |||
| dhá | |||
| do | |||
| don | |||
| dtí | |||
| dá | |||
| dár | |||
| dó | |||
| faoi | |||
| faoin | |||
| faoina | |||
| faoinár | |||
| fara | |||
| fiche | |||
| gach | |||
| gan | |||
| go | |||
| gur | |||
| haon | |||
| hocht | |||
| i | |||
| iad | |||
| idir | |||
| in | |||
| ina | |||
| ins | |||
| inár | |||
| is | |||
| le | |||
| leis | |||
| lena | |||
| lenár | |||
| m' | |||
| mar | |||
| mo | |||
| mé | |||
| na | |||
| nach | |||
| naoi | |||
| naonúr | |||
| ná | |||
| ní | |||
| níor | |||
| nó | |||
| nócha | |||
| ocht | |||
| ochtar | |||
| os | |||
| roimh | |||
| sa | |||
| seacht | |||
| seachtar | |||
| seachtó | |||
| seasca | |||
| seisear | |||
| siad | |||
| sibh | |||
| sinn | |||
| sna | |||
| sé | |||
| sí | |||
| tar | |||
| thar | |||
| thú | |||
| triúr | |||
| trí | |||
| trína | |||
| trínár | |||
| tríocha | |||
| tú | |||
| um | |||
| ár | |||
| é | |||
| éis | |||
| í | |||
| ó | |||
| ón | |||
| óna | |||
| ónár | |||
| @@ -0,0 +1,161 @@ | |||
| # galican stopwords | |||
| a | |||
| aínda | |||
| alí | |||
| aquel | |||
| aquela | |||
| aquelas | |||
| aqueles | |||
| aquilo | |||
| aquí | |||
| ao | |||
| aos | |||
| as | |||
| así | |||
| á | |||
| ben | |||
| cando | |||
| che | |||
| co | |||
| coa | |||
| comigo | |||
| con | |||
| connosco | |||
| contigo | |||
| convosco | |||
| coas | |||
| cos | |||
| cun | |||
| cuns | |||
| cunha | |||
| cunhas | |||
| da | |||
| dalgunha | |||
| dalgunhas | |||
| dalgún | |||
| dalgúns | |||
| das | |||
| de | |||
| del | |||
| dela | |||
| delas | |||
| deles | |||
| desde | |||
| deste | |||
| do | |||
| dos | |||
| dun | |||
| duns | |||
| dunha | |||
| dunhas | |||
| e | |||
| el | |||
| ela | |||
| elas | |||
| eles | |||
| en | |||
| era | |||
| eran | |||
| esa | |||
| esas | |||
| ese | |||
| eses | |||
| esta | |||
| estar | |||
| estaba | |||
| está | |||
| están | |||
| este | |||
| estes | |||
| estiven | |||
| estou | |||
| eu | |||
| é | |||
| facer | |||
| foi | |||
| foron | |||
| fun | |||
| había | |||
| hai | |||
| iso | |||
| isto | |||
| la | |||
| las | |||
| lle | |||
| lles | |||
| lo | |||
| los | |||
| mais | |||
| me | |||
| meu | |||
| meus | |||
| min | |||
| miña | |||
| miñas | |||
| moi | |||
| na | |||
| nas | |||
| neste | |||
| nin | |||
| no | |||
| non | |||
| nos | |||
| nosa | |||
| nosas | |||
| noso | |||
| nosos | |||
| nós | |||
| nun | |||
| nunha | |||
| nuns | |||
| nunhas | |||
| o | |||
| os | |||
| ou | |||
| ó | |||
| ós | |||
| para | |||
| pero | |||
| pode | |||
| pois | |||
| pola | |||
| polas | |||
| polo | |||
| polos | |||
| por | |||
| que | |||
| se | |||
| senón | |||
| ser | |||
| seu | |||
| seus | |||
| sexa | |||
| sido | |||
| sobre | |||
| súa | |||
| súas | |||
| tamén | |||
| tan | |||
| te | |||
| ten | |||
| teñen | |||
| teño | |||
| ter | |||
| teu | |||
| teus | |||
| ti | |||
| tido | |||
| tiña | |||
| tiven | |||
| túa | |||
| túas | |||
| un | |||
| unha | |||
| unhas | |||
| uns | |||
| vos | |||
| vosa | |||
| vosas | |||
| voso | |||
| vosos | |||
| vós | |||
| @@ -0,0 +1,235 @@ | |||
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||
| # Note: by default this file also contains forms normalized by HindiNormalizer | |||
| # for spelling variation (see section below), such that it can be used whether or | |||
| # not you enable that feature. When adding additional entries to this list, | |||
| # please add the normalized form as well. | |||
| अंदर | |||
| अत | |||
| अपना | |||
| अपनी | |||
| अपने | |||
| अभी | |||
| आदि | |||
| आप | |||
| इत्यादि | |||
| इन | |||
| इनका | |||
| इन्हीं | |||
| इन्हें | |||
| इन्हों | |||
| इस | |||
| इसका | |||
| इसकी | |||
| इसके | |||
| इसमें | |||
| इसी | |||
| इसे | |||
| उन | |||
| उनका | |||
| उनकी | |||
| उनके | |||
| उनको | |||
| उन्हीं | |||
| उन्हें | |||
| उन्हों | |||
| उस | |||
| उसके | |||
| उसी | |||
| उसे | |||
| एक | |||
| एवं | |||
| एस | |||
| ऐसे | |||
| और | |||
| कई | |||
| कर | |||
| करता | |||
| करते | |||
| करना | |||
| करने | |||
| करें | |||
| कहते | |||
| कहा | |||
| का | |||
| काफ़ी | |||
| कि | |||
| कितना | |||
| किन्हें | |||
| किन्हों | |||
| किया | |||
| किर | |||
| किस | |||
| किसी | |||
| किसे | |||
| की | |||
| कुछ | |||
| कुल | |||
| के | |||
| को | |||
| कोई | |||
| कौन | |||
| कौनसा | |||
| गया | |||
| घर | |||
| जब | |||
| जहाँ | |||
| जा | |||
| जितना | |||
| जिन | |||
| जिन्हें | |||
| जिन्हों | |||
| जिस | |||
| जिसे | |||
| जीधर | |||
| जैसा | |||
| जैसे | |||
| जो | |||
| तक | |||
| तब | |||
| तरह | |||
| तिन | |||
| तिन्हें | |||
| तिन्हों | |||
| तिस | |||
| तिसे | |||
| तो | |||
| था | |||
| थी | |||
| थे | |||
| दबारा | |||
| दिया | |||
| दुसरा | |||
| दूसरे | |||
| दो | |||
| द्वारा | |||
| न | |||
| नहीं | |||
| ना | |||
| निहायत | |||
| नीचे | |||
| ने | |||
| पर | |||
| पर | |||
| पहले | |||
| पूरा | |||
| पे | |||
| फिर | |||
| बनी | |||
| बही | |||
| बहुत | |||
| बाद | |||
| बाला | |||
| बिलकुल | |||
| भी | |||
| भीतर | |||
| मगर | |||
| मानो | |||
| मे | |||
| में | |||
| यदि | |||
| यह | |||
| यहाँ | |||
| यही | |||
| या | |||
| यिह | |||
| ये | |||
| रखें | |||
| रहा | |||
| रहे | |||
| ऱ्वासा | |||
| लिए | |||
| लिये | |||
| लेकिन | |||
| व | |||
| वर्ग | |||
| वह | |||
| वह | |||
| वहाँ | |||
| वहीं | |||
| वाले | |||
| वुह | |||
| वे | |||
| वग़ैरह | |||
| संग | |||
| सकता | |||
| सकते | |||
| सबसे | |||
| सभी | |||
| साथ | |||
| साबुत | |||
| साभ | |||
| सारा | |||
| से | |||
| सो | |||
| ही | |||
| हुआ | |||
| हुई | |||
| हुए | |||
| है | |||
| हैं | |||
| हो | |||
| होता | |||
| होती | |||
| होते | |||
| होना | |||
| होने | |||
| # additional normalized forms of the above | |||
| अपनि | |||
| जेसे | |||
| होति | |||
| सभि | |||
| तिंहों | |||
| इंहों | |||
| दवारा | |||
| इसि | |||
| किंहें | |||
| थि | |||
| उंहों | |||
| ओर | |||
| जिंहें | |||
| वहिं | |||
| अभि | |||
| बनि | |||
| हि | |||
| उंहिं | |||
| उंहें | |||
| हें | |||
| वगेरह | |||
| एसे | |||
| रवासा | |||
| कोन | |||
| निचे | |||
| काफि | |||
| उसि | |||
| पुरा | |||
| भितर | |||
| हे | |||
| बहि | |||
| वहां | |||
| कोइ | |||
| यहां | |||
| जिंहों | |||
| तिंहें | |||
| किसि | |||
| कइ | |||
| यहि | |||
| इंहिं | |||
| जिधर | |||
| इंहें | |||
| अदि | |||
| इतयादि | |||
| हुइ | |||
| कोनसा | |||
| इसकि | |||
| दुसरे | |||
| जहां | |||
| अप | |||
| किंहों | |||
| उनकि | |||
| भि | |||
| वरग | |||
| हुअ | |||
| जेसा | |||
| नहिं | |||
| @@ -0,0 +1,211 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | Hungarian stop word list | |||
| | prepared by Anna Tordai | |||
| a | |||
| ahogy | |||
| ahol | |||
| aki | |||
| akik | |||
| akkor | |||
| alatt | |||
| által | |||
| általában | |||
| amely | |||
| amelyek | |||
| amelyekben | |||
| amelyeket | |||
| amelyet | |||
| amelynek | |||
| ami | |||
| amit | |||
| amolyan | |||
| amíg | |||
| amikor | |||
| át | |||
| abban | |||
| ahhoz | |||
| annak | |||
| arra | |||
| arról | |||
| az | |||
| azok | |||
| azon | |||
| azt | |||
| azzal | |||
| azért | |||
| aztán | |||
| azután | |||
| azonban | |||
| bár | |||
| be | |||
| belül | |||
| benne | |||
| cikk | |||
| cikkek | |||
| cikkeket | |||
| csak | |||
| de | |||
| e | |||
| eddig | |||
| egész | |||
| egy | |||
| egyes | |||
| egyetlen | |||
| egyéb | |||
| egyik | |||
| egyre | |||
| ekkor | |||
| el | |||
| elég | |||
| ellen | |||
| elő | |||
| először | |||
| előtt | |||
| első | |||
| én | |||
| éppen | |||
| ebben | |||
| ehhez | |||
| emilyen | |||
| ennek | |||
| erre | |||
| ez | |||
| ezt | |||
| ezek | |||
| ezen | |||
| ezzel | |||
| ezért | |||
| és | |||
| fel | |||
| felé | |||
| hanem | |||
| hiszen | |||
| hogy | |||
| hogyan | |||
| igen | |||
| így | |||
| illetve | |||
| ill. | |||
| ill | |||
| ilyen | |||
| ilyenkor | |||
| ison | |||
| ismét | |||
| itt | |||
| jó | |||
| jól | |||
| jobban | |||
| kell | |||
| kellett | |||
| keresztül | |||
| keressünk | |||
| ki | |||
| kívül | |||
| között | |||
| közül | |||
| legalább | |||
| lehet | |||
| lehetett | |||
| legyen | |||
| lenne | |||
| lenni | |||
| lesz | |||
| lett | |||
| maga | |||
| magát | |||
| majd | |||
| majd | |||
| már | |||
| más | |||
| másik | |||
| meg | |||
| még | |||
| mellett | |||
| mert | |||
| mely | |||
| melyek | |||
| mi | |||
| mit | |||
| míg | |||
| miért | |||
| milyen | |||
| mikor | |||
| minden | |||
| mindent | |||
| mindenki | |||
| mindig | |||
| mint | |||
| mintha | |||
| mivel | |||
| most | |||
| nagy | |||
| nagyobb | |||
| nagyon | |||
| ne | |||
| néha | |||
| nekem | |||
| neki | |||
| nem | |||
| néhány | |||
| nélkül | |||
| nincs | |||
| olyan | |||
| ott | |||
| össze | |||
| ő | |||
| ők | |||
| őket | |||
| pedig | |||
| persze | |||
| rá | |||
| s | |||
| saját | |||
| sem | |||
| semmi | |||
| sok | |||
| sokat | |||
| sokkal | |||
| számára | |||
| szemben | |||
| szerint | |||
| szinte | |||
| talán | |||
| tehát | |||
| teljes | |||
| tovább | |||
| továbbá | |||
| több | |||
| úgy | |||
| ugyanis | |||
| új | |||
| újabb | |||
| újra | |||
| után | |||
| utána | |||
| utolsó | |||
| vagy | |||
| vagyis | |||
| valaki | |||
| valami | |||
| valamint | |||
| való | |||
| vagyok | |||
| van | |||
| vannak | |||
| volt | |||
| voltam | |||
| voltak | |||
| voltunk | |||
| vissza | |||
| vele | |||
| viszont | |||
| volna | |||
| @@ -0,0 +1,46 @@ | |||
| # example set of Armenian stopwords. | |||
| այդ | |||
| այլ | |||
| այն | |||
| այս | |||
| դու | |||
| դուք | |||
| եմ | |||
| են | |||
| ենք | |||
| ես | |||
| եք | |||
| է | |||
| էի | |||
| էին | |||
| էինք | |||
| էիր | |||
| էիք | |||
| էր | |||
| ըստ | |||
| թ | |||
| ի | |||
| ին | |||
| իսկ | |||
| իր | |||
| կամ | |||
| համար | |||
| հետ | |||
| հետո | |||
| մենք | |||
| մեջ | |||
| մի | |||
| ն | |||
| նա | |||
| նաև | |||
| նրա | |||
| նրանք | |||
| որ | |||
| որը | |||
| որոնք | |||
| որպես | |||
| ու | |||
| ում | |||
| պիտի | |||
| վրա | |||
| և | |||
| @@ -0,0 +1,359 @@ | |||
| # from appendix D of: A Study of Stemming Effects on Information | |||
| # Retrieval in Bahasa Indonesia | |||
| ada | |||
| adanya | |||
| adalah | |||
| adapun | |||
| agak | |||
| agaknya | |||
| agar | |||
| akan | |||
| akankah | |||
| akhirnya | |||
| aku | |||
| akulah | |||
| amat | |||
| amatlah | |||
| anda | |||
| andalah | |||
| antar | |||
| diantaranya | |||
| antara | |||
| antaranya | |||
| diantara | |||
| apa | |||
| apaan | |||
| mengapa | |||
| apabila | |||
| apakah | |||
| apalagi | |||
| apatah | |||
| atau | |||
| ataukah | |||
| ataupun | |||
| bagai | |||
| bagaikan | |||
| sebagai | |||
| sebagainya | |||
| bagaimana | |||
| bagaimanapun | |||
| sebagaimana | |||
| bagaimanakah | |||
| bagi | |||
| bahkan | |||
| bahwa | |||
| bahwasanya | |||
| sebaliknya | |||
| banyak | |||
| sebanyak | |||
| beberapa | |||
| seberapa | |||
| begini | |||
| beginian | |||
| beginikah | |||
| beginilah | |||
| sebegini | |||
| begitu | |||
| begitukah | |||
| begitulah | |||
| begitupun | |||
| sebegitu | |||
| belum | |||
| belumlah | |||
| sebelum | |||
| sebelumnya | |||
| sebenarnya | |||
| berapa | |||
| berapakah | |||
| berapalah | |||
| berapapun | |||
| betulkah | |||
| sebetulnya | |||
| biasa | |||
| biasanya | |||
| bila | |||
| bilakah | |||
| bisa | |||
| bisakah | |||
| sebisanya | |||
| boleh | |||
| bolehkah | |||
| bolehlah | |||
| buat | |||
| bukan | |||
| bukankah | |||
| bukanlah | |||
| bukannya | |||
| cuma | |||
| percuma | |||
| dahulu | |||
| dalam | |||
| dan | |||
| dapat | |||
| dari | |||
| daripada | |||
| dekat | |||
| demi | |||
| demikian | |||
| demikianlah | |||
| sedemikian | |||
| dengan | |||
| depan | |||
| di | |||
| dia | |||
| dialah | |||
| dini | |||
| diri | |||
| dirinya | |||
| terdiri | |||
| dong | |||
| dulu | |||
| enggak | |||
| enggaknya | |||
| entah | |||
| entahlah | |||
| terhadap | |||
| terhadapnya | |||
| hal | |||
| hampir | |||
| hanya | |||
| hanyalah | |||
| harus | |||
| haruslah | |||
| harusnya | |||
| seharusnya | |||
| hendak | |||
| hendaklah | |||
| hendaknya | |||
| hingga | |||
| sehingga | |||
| ia | |||
| ialah | |||
| ibarat | |||
| ingin | |||
| inginkah | |||
| inginkan | |||
| ini | |||
| inikah | |||
| inilah | |||
| itu | |||
| itukah | |||
| itulah | |||
| jangan | |||
| jangankan | |||
| janganlah | |||
| jika | |||
| jikalau | |||
| juga | |||
| justru | |||
| kala | |||
| kalau | |||
| kalaulah | |||
| kalaupun | |||
| kalian | |||
| kami | |||
| kamilah | |||
| kamu | |||
| kamulah | |||
| kan | |||
| kapan | |||
| kapankah | |||
| kapanpun | |||
| dikarenakan | |||
| karena | |||
| karenanya | |||
| ke | |||
| kecil | |||
| kemudian | |||
| kenapa | |||
| kepada | |||
| kepadanya | |||
| ketika | |||
| seketika | |||
| khususnya | |||
| kini | |||
| kinilah | |||
| kiranya | |||
| sekiranya | |||
| kita | |||
| kitalah | |||
| kok | |||
| lagi | |||
| lagian | |||
| selagi | |||
| lah | |||
| lain | |||
| lainnya | |||
| melainkan | |||
| selaku | |||
| lalu | |||
| melalui | |||
| terlalu | |||
| lama | |||
| lamanya | |||
| selama | |||
| selama | |||
| selamanya | |||
| lebih | |||
| terlebih | |||
| bermacam | |||
| macam | |||
| semacam | |||
| maka | |||
| makanya | |||
| makin | |||
| malah | |||
| malahan | |||
| mampu | |||
| mampukah | |||
| mana | |||
| manakala | |||
| manalagi | |||
| masih | |||
| masihkah | |||
| semasih | |||
| masing | |||
| mau | |||
| maupun | |||
| semaunya | |||
| memang | |||
| mereka | |||
| merekalah | |||
| meski | |||
| meskipun | |||
| semula | |||
| mungkin | |||
| mungkinkah | |||
| nah | |||
| namun | |||
| nanti | |||
| nantinya | |||
| nyaris | |||
| oleh | |||
| olehnya | |||
| seorang | |||
| seseorang | |||
| pada | |||
| padanya | |||
| padahal | |||
| paling | |||
| sepanjang | |||
| pantas | |||
| sepantasnya | |||
| sepantasnyalah | |||
| para | |||
| pasti | |||
| pastilah | |||
| per | |||
| pernah | |||
| pula | |||
| pun | |||
| merupakan | |||
| rupanya | |||
| serupa | |||
| saat | |||
| saatnya | |||
| sesaat | |||
| saja | |||
| sajalah | |||
| saling | |||
| bersama | |||
| sama | |||
| sesama | |||
| sambil | |||
| sampai | |||
| sana | |||
| sangat | |||
| sangatlah | |||
| saya | |||
| sayalah | |||
| se | |||
| sebab | |||
| sebabnya | |||
| sebuah | |||
| tersebut | |||
| tersebutlah | |||
| sedang | |||
| sedangkan | |||
| sedikit | |||
| sedikitnya | |||
| segala | |||
| segalanya | |||
| segera | |||
| sesegera | |||
| sejak | |||
| sejenak | |||
| sekali | |||
| sekalian | |||
| sekalipun | |||
| sesekali | |||
| sekaligus | |||
| sekarang | |||
| sekarang | |||
| sekitar | |||
| sekitarnya | |||
| sela | |||
| selain | |||
| selalu | |||
| seluruh | |||
| seluruhnya | |||
| semakin | |||
| sementara | |||
| sempat | |||
| semua | |||
| semuanya | |||
| sendiri | |||
| sendirinya | |||
| seolah | |||
| seperti | |||
| sepertinya | |||
| sering | |||
| seringnya | |||
| serta | |||
| siapa | |||
| siapakah | |||
| siapapun | |||
| disini | |||
| disinilah | |||
| sini | |||
| sinilah | |||
| sesuatu | |||
| sesuatunya | |||
| suatu | |||
| sesudah | |||
| sesudahnya | |||
| sudah | |||
| sudahkah | |||
| sudahlah | |||
| supaya | |||
| tadi | |||
| tadinya | |||
| tak | |||
| tanpa | |||
| setelah | |||
| telah | |||
| tentang | |||
| tentu | |||
| tentulah | |||
| tentunya | |||
| tertentu | |||
| seterusnya | |||
| tapi | |||
| tetapi | |||
| setiap | |||
| tiap | |||
| setidaknya | |||
| tidak | |||
| tidakkah | |||
| tidaklah | |||
| toh | |||
| waduh | |||
| wah | |||
| wahai | |||
| sewaktu | |||
| walau | |||
| walaupun | |||
| wong | |||
| yaitu | |||
| yakni | |||
| yang | |||
| @@ -0,0 +1,303 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | An Italian stop word list. Comments begin with vertical bar. Each stop | |||
| | word is at the start of a line. | |||
| ad | a (to) before vowel | |||
| al | a + il | |||
| allo | a + lo | |||
| ai | a + i | |||
| agli | a + gli | |||
| all | a + l' | |||
| agl | a + gl' | |||
| alla | a + la | |||
| alle | a + le | |||
| con | with | |||
| col | con + il | |||
| coi | con + i (forms collo, cogli etc are now very rare) | |||
| da | from | |||
| dal | da + il | |||
| dallo | da + lo | |||
| dai | da + i | |||
| dagli | da + gli | |||
| dall | da + l' | |||
| dagl | da + gll' | |||
| dalla | da + la | |||
| dalle | da + le | |||
| di | of | |||
| del | di + il | |||
| dello | di + lo | |||
| dei | di + i | |||
| degli | di + gli | |||
| dell | di + l' | |||
| degl | di + gl' | |||
| della | di + la | |||
| delle | di + le | |||
| in | in | |||
| nel | in + el | |||
| nello | in + lo | |||
| nei | in + i | |||
| negli | in + gli | |||
| nell | in + l' | |||
| negl | in + gl' | |||
| nella | in + la | |||
| nelle | in + le | |||
| su | on | |||
| sul | su + il | |||
| sullo | su + lo | |||
| sui | su + i | |||
| sugli | su + gli | |||
| sull | su + l' | |||
| sugl | su + gl' | |||
| sulla | su + la | |||
| sulle | su + le | |||
| per | through, by | |||
| tra | among | |||
| contro | against | |||
| io | I | |||
| tu | thou | |||
| lui | he | |||
| lei | she | |||
| noi | we | |||
| voi | you | |||
| loro | they | |||
| mio | my | |||
| mia | | |||
| miei | | |||
| mie | | |||
| tuo | | |||
| tua | | |||
| tuoi | thy | |||
| tue | | |||
| suo | | |||
| sua | | |||
| suoi | his, her | |||
| sue | | |||
| nostro | our | |||
| nostra | | |||
| nostri | | |||
| nostre | | |||
| vostro | your | |||
| vostra | | |||
| vostri | | |||
| vostre | | |||
| mi | me | |||
| ti | thee | |||
| ci | us, there | |||
| vi | you, there | |||
| lo | him, the | |||
| la | her, the | |||
| li | them | |||
| le | them, the | |||
| gli | to him, the | |||
| ne | from there etc | |||
| il | the | |||
| un | a | |||
| uno | a | |||
| una | a | |||
| ma | but | |||
| ed | and | |||
| se | if | |||
| perché | why, because | |||
| anche | also | |||
| come | how | |||
| dov | where (as dov') | |||
| dove | where | |||
| che | who, that | |||
| chi | who | |||
| cui | whom | |||
| non | not | |||
| più | more | |||
| quale | who, that | |||
| quanto | how much | |||
| quanti | | |||
| quanta | | |||
| quante | | |||
| quello | that | |||
| quelli | | |||
| quella | | |||
| quelle | | |||
| questo | this | |||
| questi | | |||
| questa | | |||
| queste | | |||
| si | yes | |||
| tutto | all | |||
| tutti | all | |||
| | single letter forms: | |||
| a | at | |||
| c | as c' for ce or ci | |||
| e | and | |||
| i | the | |||
| l | as l' | |||
| o | or | |||
| | forms of avere, to have (not including the infinitive): | |||
| ho | |||
| hai | |||
| ha | |||
| abbiamo | |||
| avete | |||
| hanno | |||
| abbia | |||
| abbiate | |||
| abbiano | |||
| avrò | |||
| avrai | |||
| avrà | |||
| avremo | |||
| avrete | |||
| avranno | |||
| avrei | |||
| avresti | |||
| avrebbe | |||
| avremmo | |||
| avreste | |||
| avrebbero | |||
| avevo | |||
| avevi | |||
| aveva | |||
| avevamo | |||
| avevate | |||
| avevano | |||
| ebbi | |||
| avesti | |||
| ebbe | |||
| avemmo | |||
| aveste | |||
| ebbero | |||
| avessi | |||
| avesse | |||
| avessimo | |||
| avessero | |||
| avendo | |||
| avuto | |||
| avuta | |||
| avuti | |||
| avute | |||
| | forms of essere, to be (not including the infinitive): | |||
| sono | |||
| sei | |||
| è | |||
| siamo | |||
| siete | |||
| sia | |||
| siate | |||
| siano | |||
| sarò | |||
| sarai | |||
| sarà | |||
| saremo | |||
| sarete | |||
| saranno | |||
| sarei | |||
| saresti | |||
| sarebbe | |||
| saremmo | |||
| sareste | |||
| sarebbero | |||
| ero | |||
| eri | |||
| era | |||
| eravamo | |||
| eravate | |||
| erano | |||
| fui | |||
| fosti | |||
| fu | |||
| fummo | |||
| foste | |||
| furono | |||
| fossi | |||
| fosse | |||
| fossimo | |||
| fossero | |||
| essendo | |||
| | forms of fare, to do (not including the infinitive, fa, fat-): | |||
| faccio | |||
| fai | |||
| facciamo | |||
| fanno | |||
| faccia | |||
| facciate | |||
| facciano | |||
| farò | |||
| farai | |||
| farà | |||
| faremo | |||
| farete | |||
| faranno | |||
| farei | |||
| faresti | |||
| farebbe | |||
| faremmo | |||
| fareste | |||
| farebbero | |||
| facevo | |||
| facevi | |||
| faceva | |||
| facevamo | |||
| facevate | |||
| facevano | |||
| feci | |||
| facesti | |||
| fece | |||
| facemmo | |||
| faceste | |||
| fecero | |||
| facessi | |||
| facesse | |||
| facessimo | |||
| facessero | |||
| facendo | |||
| | forms of stare, to be (not including the infinitive): | |||
| sto | |||
| stai | |||
| sta | |||
| stiamo | |||
| stanno | |||
| stia | |||
| stiate | |||
| stiano | |||
| starò | |||
| starai | |||
| starà | |||
| staremo | |||
| starete | |||
| staranno | |||
| starei | |||
| staresti | |||
| starebbe | |||
| staremmo | |||
| stareste | |||
| starebbero | |||
| stavo | |||
| stavi | |||
| stava | |||
| stavamo | |||
| stavate | |||
| stavano | |||
| stetti | |||
| stesti | |||
| stette | |||
| stemmo | |||
| steste | |||
| stettero | |||
| stessi | |||
| stesse | |||
| stessimo | |||
| stessero | |||
| stando | |||
| @@ -0,0 +1,127 @@ | |||
| # | |||
| # This file defines a stopword set for Japanese. | |||
| # | |||
| # This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. | |||
| # Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 | |||
| # for frequency lists, etc. that can be useful for making your own set (if desired) | |||
| # | |||
| # Note that there is an overlap between these stopwords and the terms stopped when used | |||
| # in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note | |||
| # that comments are not allowed on the same line as stopwords. | |||
| # | |||
| # Also note that stopping is done in a case-insensitive manner. Change your StopFilter | |||
| # configuration if you need case-sensitive stopping. Lastly, note that stopping is done | |||
| # using the same character width as the entries in this file. Since this StopFilter is | |||
| # normally done after a CJKWidthFilter in your chain, you would usually want your romaji | |||
| # entries to be in half-width and your kana entries to be in full-width. | |||
| # | |||
| の | |||
| に | |||
| は | |||
| を | |||
| た | |||
| が | |||
| で | |||
| て | |||
| と | |||
| し | |||
| れ | |||
| さ | |||
| ある | |||
| いる | |||
| も | |||
| する | |||
| から | |||
| な | |||
| こと | |||
| として | |||
| い | |||
| や | |||
| れる | |||
| など | |||
| なっ | |||
| ない | |||
| この | |||
| ため | |||
| その | |||
| あっ | |||
| よう | |||
| また | |||
| もの | |||
| という | |||
| あり | |||
| まで | |||
| られ | |||
| なる | |||
| へ | |||
| か | |||
| だ | |||
| これ | |||
| によって | |||
| により | |||
| おり | |||
| より | |||
| による | |||
| ず | |||
| なり | |||
| られる | |||
| において | |||
| ば | |||
| なかっ | |||
| なく | |||
| しかし | |||
| について | |||
| せ | |||
| だっ | |||
| その後 | |||
| できる | |||
| それ | |||
| う | |||
| ので | |||
| なお | |||
| のみ | |||
| でき | |||
| き | |||
| つ | |||
| における | |||
| および | |||
| いう | |||
| さらに | |||
| でも | |||
| ら | |||
| たり | |||
| その他 | |||
| に関する | |||
| たち | |||
| ます | |||
| ん | |||
| なら | |||
| に対して | |||
| 特に | |||
| せる | |||
| 及び | |||
| これら | |||
| とき | |||
| では | |||
| にて | |||
| ほか | |||
| ながら | |||
| うち | |||
| そして | |||
| とともに | |||
| ただし | |||
| かつて | |||
| それぞれ | |||
| または | |||
| お | |||
| ほど | |||
| ものの | |||
| に対する | |||
| ほとんど | |||
| と共に | |||
| といった | |||
| です | |||
| とも | |||
| ところ | |||
| ここ | |||
| ##### End of file | |||
| @@ -0,0 +1,172 @@ | |||
| # Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins | |||
| # the original list of over 800 forms was refined: | |||
| # pronouns, adverbs, interjections were removed | |||
| # | |||
| # prepositions | |||
| aiz | |||
| ap | |||
| ar | |||
| apakš | |||
| ārpus | |||
| augšpus | |||
| bez | |||
| caur | |||
| dēļ | |||
| gar | |||
| iekš | |||
| iz | |||
| kopš | |||
| labad | |||
| lejpus | |||
| līdz | |||
| no | |||
| otrpus | |||
| pa | |||
| par | |||
| pār | |||
| pēc | |||
| pie | |||
| pirms | |||
| pret | |||
| priekš | |||
| starp | |||
| šaipus | |||
| uz | |||
| viņpus | |||
| virs | |||
| virspus | |||
| zem | |||
| apakšpus | |||
| # Conjunctions | |||
| un | |||
| bet | |||
| jo | |||
| ja | |||
| ka | |||
| lai | |||
| tomēr | |||
| tikko | |||
| turpretī | |||
| arī | |||
| kaut | |||
| gan | |||
| tādēļ | |||
| tā | |||
| ne | |||
| tikvien | |||
| vien | |||
| kā | |||
| ir | |||
| te | |||
| vai | |||
| kamēr | |||
| # Particles | |||
| ar | |||
| diezin | |||
| droši | |||
| diemžēl | |||
| nebūt | |||
| ik | |||
| it | |||
| taču | |||
| nu | |||
| pat | |||
| tiklab | |||
| iekšpus | |||
| nedz | |||
| tik | |||
| nevis | |||
| turpretim | |||
| jeb | |||
| iekam | |||
| iekām | |||
| iekāms | |||
| kolīdz | |||
| līdzko | |||
| tiklīdz | |||
| jebšu | |||
| tālab | |||
| tāpēc | |||
| nekā | |||
| itin | |||
| jā | |||
| jau | |||
| jel | |||
| nē | |||
| nezin | |||
| tad | |||
| tikai | |||
| vis | |||
| tak | |||
| iekams | |||
| vien | |||
| # modal verbs | |||
| būt | |||
| biju | |||
| biji | |||
| bija | |||
| bijām | |||
| bijāt | |||
| esmu | |||
| esi | |||
| esam | |||
| esat | |||
| būšu | |||
| būsi | |||
| būs | |||
| būsim | |||
| būsiet | |||
| tikt | |||
| tiku | |||
| tiki | |||
| tika | |||
| tikām | |||
| tikāt | |||
| tieku | |||
| tiec | |||
| tiek | |||
| tiekam | |||
| tiekat | |||
| tikšu | |||
| tiks | |||
| tiksim | |||
| tiksiet | |||
| tapt | |||
| tapi | |||
| tapāt | |||
| topat | |||
| tapšu | |||
| tapsi | |||
| taps | |||
| tapsim | |||
| tapsiet | |||
| kļūt | |||
| kļuvu | |||
| kļuvi | |||
| kļuva | |||
| kļuvām | |||
| kļuvāt | |||
| kļūstu | |||
| kļūsti | |||
| kļūst | |||
| kļūstam | |||
| kļūstat | |||
| kļūšu | |||
| kļūsi | |||
| kļūs | |||
| kļūsim | |||
| kļūsiet | |||
| # verbs | |||
| varēt | |||
| varēju | |||
| varējām | |||
| varēšu | |||
| varēsim | |||
| var | |||
| varēji | |||
| varējāt | |||
| varēsi | |||
| varēsiet | |||
| varat | |||
| varēja | |||
| varēs | |||
| @@ -0,0 +1,119 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | A Dutch stop word list. Comments begin with vertical bar. Each stop | |||
| | word is at the start of a line. | |||
| | This is a ranked list (commonest to rarest) of stopwords derived from | |||
| | a large sample of Dutch text. | |||
| | Dutch stop words frequently exhibit homonym clashes. These are indicated | |||
| | clearly below. | |||
| de | the | |||
| en | and | |||
| van | of, from | |||
| ik | I, the ego | |||
| te | (1) chez, at etc, (2) to, (3) too | |||
| dat | that, which | |||
| die | that, those, who, which | |||
| in | in, inside | |||
| een | a, an, one | |||
| hij | he | |||
| het | the, it | |||
| niet | not, nothing, naught | |||
| zijn | (1) to be, being, (2) his, one's, its | |||
| is | is | |||
| was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river | |||
| op | on, upon, at, in, up, used up | |||
| aan | on, upon, to (as dative) | |||
| met | with, by | |||
| als | like, such as, when | |||
| voor | (1) before, in front of, (2) furrow | |||
| had | had, past tense all persons sing. of 'hebben' (have) | |||
| er | there | |||
| maar | but, only | |||
| om | round, about, for etc | |||
| hem | him | |||
| dan | then | |||
| zou | should/would, past tense all persons sing. of 'zullen' | |||
| of | or, whether, if | |||
| wat | what, something, anything | |||
| mijn | possessive and noun 'mine' | |||
| men | people, 'one' | |||
| dit | this | |||
| zo | so, thus, in this way | |||
| door | through by | |||
| over | over, across | |||
| ze | she, her, they, them | |||
| zich | oneself | |||
| bij | (1) a bee, (2) by, near, at | |||
| ook | also, too | |||
| tot | till, until | |||
| je | you | |||
| mij | me | |||
| uit | out of, from | |||
| der | Old Dutch form of 'van der' still found in surnames | |||
| daar | (1) there, (2) because | |||
| haar | (1) her, their, them, (2) hair | |||
| naar | (1) unpleasant, unwell etc, (2) towards, (3) as | |||
| heb | present first person sing. of 'to have' | |||
| hoe | how, why | |||
| heeft | present third person sing. of 'to have' | |||
| hebben | 'to have' and various parts thereof | |||
| deze | this | |||
| u | you | |||
| want | (1) for, (2) mitten, (3) rigging | |||
| nog | yet, still | |||
| zal | 'shall', first and third person sing. of verb 'zullen' (will) | |||
| me | me | |||
| zij | she, they | |||
| nu | now | |||
| ge | 'thou', still used in Belgium and south Netherlands | |||
| geen | none | |||
| omdat | because | |||
| iets | something, somewhat | |||
| worden | to become, grow, get | |||
| toch | yet, still | |||
| al | all, every, each | |||
| waren | (1) 'were' (2) to wander, (3) wares, (3) | |||
| veel | much, many | |||
| meer | (1) more, (2) lake | |||
| doen | to do, to make | |||
| toen | then, when | |||
| moet | noun 'spot/mote' and present form of 'to must' | |||
| ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' | |||
| zonder | without | |||
| kan | noun 'can' and present form of 'to be able' | |||
| hun | their, them | |||
| dus | so, consequently | |||
| alles | all, everything, anything | |||
| onder | under, beneath | |||
| ja | yes, of course | |||
| eens | once, one day | |||
| hier | here | |||
| wie | who | |||
| werd | imperfect third person sing. of 'become' | |||
| altijd | always | |||
| doch | yet, but etc | |||
| wordt | present third person sing. of 'become' | |||
| wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans | |||
| kunnen | to be able | |||
| ons | us/our | |||
| zelf | self | |||
| tegen | against, towards, at | |||
| na | after, near | |||
| reeds | already | |||
| wil | (1) present tense of 'want', (2) 'will', noun, (3) fender | |||
| kon | could; past tense of 'to be able' | |||
| niets | nothing | |||
| uw | your | |||
| iemand | somebody | |||
| geweest | been; past participle of 'be' | |||
| andere | other | |||
| @@ -0,0 +1,194 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | A Norwegian stop word list. Comments begin with vertical bar. Each stop | |||
| | word is at the start of a line. | |||
| | This stop word list is for the dominant bokmål dialect. Words unique | |||
| | to nynorsk are marked *. | |||
| | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005 | |||
| og | and | |||
| i | in | |||
| jeg | I | |||
| det | it/this/that | |||
| at | to (w. inf.) | |||
| en | a/an | |||
| et | a/an | |||
| den | it/this/that | |||
| til | to | |||
| er | is/am/are | |||
| som | who/that | |||
| på | on | |||
| de | they / you(formal) | |||
| med | with | |||
| han | he | |||
| av | of | |||
| ikke | not | |||
| ikkje | not * | |||
| der | there | |||
| så | so | |||
| var | was/were | |||
| meg | me | |||
| seg | you | |||
| men | but | |||
| ett | one | |||
| har | have | |||
| om | about | |||
| vi | we | |||
| min | my | |||
| mitt | my | |||
| ha | have | |||
| hadde | had | |||
| hun | she | |||
| nå | now | |||
| over | over | |||
| da | when/as | |||
| ved | by/know | |||
| fra | from | |||
| du | you | |||
| ut | out | |||
| sin | your | |||
| dem | them | |||
| oss | us | |||
| opp | up | |||
| man | you/one | |||
| kan | can | |||
| hans | his | |||
| hvor | where | |||
| eller | or | |||
| hva | what | |||
| skal | shall/must | |||
| selv | self (reflective) | |||
| sjøl | self (reflective) | |||
| her | here | |||
| alle | all | |||
| vil | will | |||
| bli | become | |||
| ble | became | |||
| blei | became * | |||
| blitt | have become | |||
| kunne | could | |||
| inn | in | |||
| når | when | |||
| være | be | |||
| kom | come | |||
| noen | some | |||
| noe | some | |||
| ville | would | |||
| dere | you | |||
| som | who/which/that | |||
| deres | their/theirs | |||
| kun | only/just | |||
| ja | yes | |||
| etter | after | |||
| ned | down | |||
| skulle | should | |||
| denne | this | |||
| for | for/because | |||
| deg | you | |||
| si | hers/his | |||
| sine | hers/his | |||
| sitt | hers/his | |||
| mot | against | |||
| å | to | |||
| meget | much | |||
| hvorfor | why | |||
| dette | this | |||
| disse | these/those | |||
| uten | without | |||
| hvordan | how | |||
| ingen | none | |||
| din | your | |||
| ditt | your | |||
| blir | become | |||
| samme | same | |||
| hvilken | which | |||
| hvilke | which (plural) | |||
| sånn | such a | |||
| inni | inside/within | |||
| mellom | between | |||
| vår | our | |||
| hver | each | |||
| hvem | who | |||
| vors | us/ours | |||
| hvis | whose | |||
| både | both | |||
| bare | only/just | |||
| enn | than | |||
| fordi | as/because | |||
| før | before | |||
| mange | many | |||
| også | also | |||
| slik | just | |||
| vært | been | |||
| være | to be | |||
| båe | both * | |||
| begge | both | |||
| siden | since | |||
| dykk | your * | |||
| dykkar | yours * | |||
| dei | they * | |||
| deira | them * | |||
| deires | theirs * | |||
| deim | them * | |||
| di | your (fem.) * | |||
| då | as/when * | |||
| eg | I * | |||
| ein | a/an * | |||
| eit | a/an * | |||
| eitt | a/an * | |||
| elles | or * | |||
| honom | he * | |||
| hjå | at * | |||
| ho | she * | |||
| hoe | she * | |||
| henne | her | |||
| hennar | her/hers | |||
| hennes | hers | |||
| hoss | how * | |||
| hossen | how * | |||
| ikkje | not * | |||
| ingi | noone * | |||
| inkje | noone * | |||
| korleis | how * | |||
| korso | how * | |||
| kva | what/which * | |||
| kvar | where * | |||
| kvarhelst | where * | |||
| kven | who/whom * | |||
| kvi | why * | |||
| kvifor | why * | |||
| me | we * | |||
| medan | while * | |||
| mi | my * | |||
| mine | my * | |||
| mykje | much * | |||
| no | now * | |||
| nokon | some (masc./neut.) * | |||
| noka | some (fem.) * | |||
| nokor | some * | |||
| noko | some * | |||
| nokre | some * | |||
| si | his/hers * | |||
| sia | since * | |||
| sidan | since * | |||
| so | so * | |||
| somt | some * | |||
| somme | some * | |||
| um | about* | |||
| upp | up * | |||
| vere | be * | |||
| vore | was * | |||
| verte | become * | |||
| vort | become * | |||
| varte | became * | |||
| vart | became * | |||
| @@ -0,0 +1,253 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | A Portuguese stop word list. Comments begin with vertical bar. Each stop | |||
| | word is at the start of a line. | |||
| | The following is a ranked list (commonest to rarest) of stopwords | |||
| | deriving from a large sample of text. | |||
| | Extra words have been added at the end. | |||
| de | of, from | |||
| a | the; to, at; her | |||
| o | the; him | |||
| que | who, that | |||
| e | and | |||
| do | de + o | |||
| da | de + a | |||
| em | in | |||
| um | a | |||
| para | for | |||
| | é from SER | |||
| com | with | |||
| não | not, no | |||
| uma | a | |||
| os | the; them | |||
| no | em + o | |||
| se | himself etc | |||
| na | em + a | |||
| por | for | |||
| mais | more | |||
| as | the; them | |||
| dos | de + os | |||
| como | as, like | |||
| mas | but | |||
| | foi from SER | |||
| ao | a + o | |||
| ele | he | |||
| das | de + as | |||
| | tem from TER | |||
| à | a + a | |||
| seu | his | |||
| sua | her | |||
| ou | or | |||
| | ser from SER | |||
| quando | when | |||
| muito | much | |||
| | há from HAV | |||
| nos | em + os; us | |||
| já | already, now | |||
| | está from EST | |||
| eu | I | |||
| também | also | |||
| só | only, just | |||
| pelo | per + o | |||
| pela | per + a | |||
| até | up to | |||
| isso | that | |||
| ela | he | |||
| entre | between | |||
| | era from SER | |||
| depois | after | |||
| sem | without | |||
| mesmo | same | |||
| aos | a + os | |||
| | ter from TER | |||
| seus | his | |||
| quem | whom | |||
| nas | em + as | |||
| me | me | |||
| esse | that | |||
| eles | they | |||
| | estão from EST | |||
| você | you | |||
| | tinha from TER | |||
| | foram from SER | |||
| essa | that | |||
| num | em + um | |||
| nem | nor | |||
| suas | her | |||
| meu | my | |||
| às | a + as | |||
| minha | my | |||
| | têm from TER | |||
| numa | em + uma | |||
| pelos | per + os | |||
| elas | they | |||
| | havia from HAV | |||
| | seja from SER | |||
| qual | which | |||
| | será from SER | |||
| nós | we | |||
| | tenho from TER | |||
| lhe | to him, her | |||
| deles | of them | |||
| essas | those | |||
| esses | those | |||
| pelas | per + as | |||
| este | this | |||
| | fosse from SER | |||
| dele | of him | |||
| | other words. There are many contractions such as naquele = em+aquele, | |||
| | mo = me+o, but they are rare. | |||
| | Indefinite article plural forms are also rare. | |||
| tu | thou | |||
| te | thee | |||
| vocês | you (plural) | |||
| vos | you | |||
| lhes | to them | |||
| meus | my | |||
| minhas | |||
| teu | thy | |||
| tua | |||
| teus | |||
| tuas | |||
| nosso | our | |||
| nossa | |||
| nossos | |||
| nossas | |||
| dela | of her | |||
| delas | of them | |||
| esta | this | |||
| estes | these | |||
| estas | these | |||
| aquele | that | |||
| aquela | that | |||
| aqueles | those | |||
| aquelas | those | |||
| isto | this | |||
| aquilo | that | |||
| | forms of estar, to be (not including the infinitive): | |||
| estou | |||
| está | |||
| estamos | |||
| estão | |||
| estive | |||
| esteve | |||
| estivemos | |||
| estiveram | |||
| estava | |||
| estávamos | |||
| estavam | |||
| estivera | |||
| estivéramos | |||
| esteja | |||
| estejamos | |||
| estejam | |||
| estivesse | |||
| estivéssemos | |||
| estivessem | |||
| estiver | |||
| estivermos | |||
| estiverem | |||
| | forms of haver, to have (not including the infinitive): | |||
| hei | |||
| há | |||
| havemos | |||
| hão | |||
| houve | |||
| houvemos | |||
| houveram | |||
| houvera | |||
| houvéramos | |||
| haja | |||
| hajamos | |||
| hajam | |||
| houvesse | |||
| houvéssemos | |||
| houvessem | |||
| houver | |||
| houvermos | |||
| houverem | |||
| houverei | |||
| houverá | |||
| houveremos | |||
| houverão | |||
| houveria | |||
| houveríamos | |||
| houveriam | |||
| | forms of ser, to be (not including the infinitive): | |||
| sou | |||
| somos | |||
| são | |||
| era | |||
| éramos | |||
| eram | |||
| fui | |||
| foi | |||
| fomos | |||
| foram | |||
| fora | |||
| fôramos | |||
| seja | |||
| sejamos | |||
| sejam | |||
| fosse | |||
| fôssemos | |||
| fossem | |||
| for | |||
| formos | |||
| forem | |||
| serei | |||
| será | |||
| seremos | |||
| serão | |||
| seria | |||
| seríamos | |||
| seriam | |||
| | forms of ter, to have (not including the infinitive): | |||
| tenho | |||
| tem | |||
| temos | |||
| tém | |||
| tinha | |||
| tínhamos | |||
| tinham | |||
| tive | |||
| teve | |||
| tivemos | |||
| tiveram | |||
| tivera | |||
| tivéramos | |||
| tenha | |||
| tenhamos | |||
| tenham | |||
| tivesse | |||
| tivéssemos | |||
| tivessem | |||
| tiver | |||
| tivermos | |||
| tiverem | |||
| terei | |||
| terá | |||
| teremos | |||
| terão | |||
| teria | |||
| teríamos | |||
| teriam | |||
| @@ -0,0 +1,233 @@ | |||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||
| acea | |||
| aceasta | |||
| această | |||
| aceea | |||
| acei | |||
| aceia | |||
| acel | |||
| acela | |||
| acele | |||
| acelea | |||
| acest | |||
| acesta | |||
| aceste | |||
| acestea | |||
| aceşti | |||
| aceştia | |||
| acolo | |||
| acum | |||
| ai | |||
| aia | |||
| aibă | |||
| aici | |||
| al | |||
| ăla | |||
| ale | |||
| alea | |||
| ălea | |||
| altceva | |||
| altcineva | |||
| am | |||
| ar | |||
| are | |||
| aş | |||
| aşadar | |||
| asemenea | |||
| asta | |||
| ăsta | |||
| astăzi | |||
| astea | |||
| ăstea | |||
| ăştia | |||
| asupra | |||
| aţi | |||
| au | |||
| avea | |||
| avem | |||
| aveţi | |||
| azi | |||
| bine | |||
| bucur | |||
| bună | |||
| ca | |||
| că | |||
| căci | |||
| când | |||
| care | |||
| cărei | |||
| căror | |||
| cărui | |||
| cât | |||
| câte | |||
| câţi | |||
| către | |||
| câtva | |||
| ce | |||
| cel | |||
| ceva | |||
| chiar | |||
| cînd | |||
| cine | |||
| cineva | |||
| cît | |||
| cîte | |||
| cîţi | |||
| cîtva | |||
| contra | |||
| cu | |||
| cum | |||
| cumva | |||
| curând | |||
| curînd | |||
| da | |||
| dă | |||
| dacă | |||
| dar | |||
| datorită | |||
| de | |||
| deci | |||
| deja | |||
| deoarece | |||
| departe | |||
| deşi | |||
| din | |||
| dinaintea | |||
| dintr | |||
| dintre | |||
| drept | |||
| după | |||
| ea | |||
| ei | |||
| el | |||
| ele | |||
| eram | |||
| este | |||
| eşti | |||
| eu | |||
| face | |||
| fără | |||
| fi | |||
| fie | |||
| fiecare | |||
| fii | |||
| fim | |||
| fiţi | |||
| iar | |||
| ieri | |||
| îi | |||
| îl | |||
| îmi | |||
| împotriva | |||
| în | |||
| înainte | |||
| înaintea | |||
| încât | |||
| încît | |||
| încotro | |||
| între | |||
| întrucât | |||
| întrucît | |||
| îţi | |||
| la | |||
| lângă | |||
| le | |||
| li | |||
| lîngă | |||
| lor | |||
| lui | |||
| mă | |||
| mâine | |||
| mea | |||
| mei | |||
| mele | |||
| mereu | |||
| meu | |||
| mi | |||
| mine | |||
| mult | |||
| multă | |||
| mulţi | |||
| ne | |||
| nicăieri | |||
| nici | |||
| nimeni | |||
| nişte | |||
| noastră | |||
| noastre | |||
| noi | |||
| noştri | |||
| nostru | |||
| nu | |||
| ori | |||
| oricând | |||
| oricare | |||
| oricât | |||
| orice | |||
| oricînd | |||
| oricine | |||
| oricît | |||
| oricum | |||
| oriunde | |||
| până | |||
| pe | |||
| pentru | |||
| peste | |||
| pînă | |||
| poate | |||
| pot | |||
| prea | |||
| prima | |||
| primul | |||
| prin | |||
| printr | |||
| sa | |||
| să | |||
| săi | |||
| sale | |||
| sau | |||
| său | |||
| se | |||
| şi | |||
| sînt | |||
| sîntem | |||
| sînteţi | |||
| spre | |||
| sub | |||
| sunt | |||
| suntem | |||
| sunteţi | |||
| ta | |||
| tăi | |||
| tale | |||
| tău | |||
| te | |||
| ţi | |||
| ţie | |||
| tine | |||
| toată | |||
| toate | |||
| tot | |||
| toţi | |||
| totuşi | |||
| tu | |||
| un | |||
| una | |||
| unde | |||
| undeva | |||
| unei | |||
| unele | |||
| uneori | |||
| unor | |||
| vă | |||
| vi | |||
| voastră | |||
| voastre | |||
| voi | |||
| voştri | |||
| vostru | |||
| vouă | |||
| vreo | |||
| vreun | |||
| @@ -0,0 +1,243 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | a russian stop word list. comments begin with vertical bar. each stop | |||
| | word is at the start of a line. | |||
| | this is a ranked list (commonest to rarest) of stopwords derived from | |||
| | a large text sample. | |||
| | letter `ё' is translated to `е'. | |||
| и | and | |||
| в | in/into | |||
| во | alternative form | |||
| не | not | |||
| что | what/that | |||
| он | he | |||
| на | on/onto | |||
| я | i | |||
| с | from | |||
| со | alternative form | |||
| как | how | |||
| а | milder form of `no' (but) | |||
| то | conjunction and form of `that' | |||
| все | all | |||
| она | she | |||
| так | so, thus | |||
| его | him | |||
| но | but | |||
| да | yes/and | |||
| ты | thou | |||
| к | towards, by | |||
| у | around, chez | |||
| же | intensifier particle | |||
| вы | you | |||
| за | beyond, behind | |||
| бы | conditional/subj. particle | |||
| по | up to, along | |||
| только | only | |||
| ее | her | |||
| мне | to me | |||
| было | it was | |||
| вот | here is/are, particle | |||
| от | away from | |||
| меня | me | |||
| еще | still, yet, more | |||
| нет | no, there isnt/arent | |||
| о | about | |||
| из | out of | |||
| ему | to him | |||
| теперь | now | |||
| когда | when | |||
| даже | even | |||
| ну | so, well | |||
| вдруг | suddenly | |||
| ли | interrogative particle | |||
| если | if | |||
| уже | already, but homonym of `narrower' | |||
| или | or | |||
| ни | neither | |||
| быть | to be | |||
| был | he was | |||
| него | prepositional form of его | |||
| до | up to | |||
| вас | you accusative | |||
| нибудь | indef. suffix preceded by hyphen | |||
| опять | again | |||
| уж | already, but homonym of `adder' | |||
| вам | to you | |||
| сказал | he said | |||
| ведь | particle `after all' | |||
| там | there | |||
| потом | then | |||
| себя | oneself | |||
| ничего | nothing | |||
| ей | to her | |||
| может | usually with `быть' as `maybe' | |||
| они | they | |||
| тут | here | |||
| где | where | |||
| есть | there is/are | |||
| надо | got to, must | |||
| ней | prepositional form of ей | |||
| для | for | |||
| мы | we | |||
| тебя | thee | |||
| их | them, their | |||
| чем | than | |||
| была | she was | |||
| сам | self | |||
| чтоб | in order to | |||
| без | without | |||
| будто | as if | |||
| человек | man, person, one | |||
| чего | genitive form of `what' | |||
| раз | once | |||
| тоже | also | |||
| себе | to oneself | |||
| под | beneath | |||
| жизнь | life | |||
| будет | will be | |||
| ж | short form of intensifer particle `же' | |||
| тогда | then | |||
| кто | who | |||
| этот | this | |||
| говорил | was saying | |||
| того | genitive form of `that' | |||
| потому | for that reason | |||
| этого | genitive form of `this' | |||
| какой | which | |||
| совсем | altogether | |||
| ним | prepositional form of `его', `они' | |||
| здесь | here | |||
| этом | prepositional form of `этот' | |||
| один | one | |||
| почти | almost | |||
| мой | my | |||
| тем | instrumental/dative plural of `тот', `то' | |||
| чтобы | full form of `in order that' | |||
| нее | her (acc.) | |||
| кажется | it seems | |||
| сейчас | now | |||
| были | they were | |||
| куда | where to | |||
| зачем | why | |||
| сказать | to say | |||
| всех | all (acc., gen. preposn. plural) | |||
| никогда | never | |||
| сегодня | today | |||
| можно | possible, one can | |||
| при | by | |||
| наконец | finally | |||
| два | two | |||
| об | alternative form of `о', about | |||
| другой | another | |||
| хоть | even | |||
| после | after | |||
| над | above | |||
| больше | more | |||
| тот | that one (masc.) | |||
| через | across, in | |||
| эти | these | |||
| нас | us | |||
| про | about | |||
| всего | in all, only, of all | |||
| них | prepositional form of `они' (they) | |||
| какая | which, feminine | |||
| много | lots | |||
| разве | interrogative particle | |||
| сказала | she said | |||
| три | three | |||
| эту | this, acc. fem. sing. | |||
| моя | my, feminine | |||
| впрочем | moreover, besides | |||
| хорошо | good | |||
| свою | ones own, acc. fem. sing. | |||
| этой | oblique form of `эта', fem. `this' | |||
| перед | in front of | |||
| иногда | sometimes | |||
| лучше | better | |||
| чуть | a little | |||
| том | preposn. form of `that one' | |||
| нельзя | one must not | |||
| такой | such a one | |||
| им | to them | |||
| более | more | |||
| всегда | always | |||
| конечно | of course | |||
| всю | acc. fem. sing of `all' | |||
| между | between | |||
| | b: some paradigms | |||
| | | |||
| | personal pronouns | |||
| | | |||
| | я меня мне мной [мною] | |||
| | ты тебя тебе тобой [тобою] | |||
| | он его ему им [него, нему, ним] | |||
| | она ее эи ею [нее, нэи, нею] | |||
| | оно его ему им [него, нему, ним] | |||
| | | |||
| | мы нас нам нами | |||
| | вы вас вам вами | |||
| | они их им ими [них, ним, ними] | |||
| | | |||
| | себя себе собой [собою] | |||
| | | |||
| | demonstrative pronouns: этот (this), тот (that) | |||
| | | |||
| | этот эта это эти | |||
| | этого эты это эти | |||
| | этого этой этого этих | |||
| | этому этой этому этим | |||
| | этим этой этим [этою] этими | |||
| | этом этой этом этих | |||
| | | |||
| | тот та то те | |||
| | того ту то те | |||
| | того той того тех | |||
| | тому той тому тем | |||
| | тем той тем [тою] теми | |||
| | том той том тех | |||
| | | |||
| | determinative pronouns | |||
| | | |||
| | (a) весь (all) | |||
| | | |||
| | весь вся все все | |||
| | всего всю все все | |||
| | всего всей всего всех | |||
| | всему всей всему всем | |||
| | всем всей всем [всею] всеми | |||
| | всем всей всем всех | |||
| | | |||
| | (b) сам (himself etc) | |||
| | | |||
| | сам сама само сами | |||
| | самого саму само самих | |||
| | самого самой самого самих | |||
| | самому самой самому самим | |||
| | самим самой самим [самою] самими | |||
| | самом самой самом самих | |||
| | | |||
| | stems of verbs `to be', `to have', `to do' and modal | |||
| | | |||
| | быть бы буд быв есть суть | |||
| | име | |||
| | дел | |||
| | мог мож мочь | |||
| | уме | |||
| | хоч хот | |||
| | долж | |||
| | можн | |||
| | нужн | |||
| | нельзя | |||
| @@ -0,0 +1,133 @@ | |||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt | |||
| | This file is distributed under the BSD License. | |||
| | See http://snowball.tartarus.org/license.php | |||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||
| | - Encoding was converted to UTF-8. | |||
| | - This notice was added. | |||
| | | |||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| | A Swedish stop word list. Comments begin with vertical bar. Each stop | |||
| | word is at the start of a line. | |||
| | This is a ranked list (commonest to rarest) of stopwords derived from | |||
| | a large text sample. | |||
| | Swedish stop words occasionally exhibit homonym clashes. For example | |||
| | så = so, but also seed. These are indicated clearly below. | |||
| och | and | |||
| det | it, this/that | |||
| att | to (with infinitive) | |||
| i | in, at | |||
| en | a | |||
| jag | I | |||
| hon | she | |||
| som | who, that | |||
| han | he | |||
| på | on | |||
| den | it, this/that | |||
| med | with | |||
| var | where, each | |||
| sig | him(self) etc | |||
| för | for | |||
| så | so (also: seed) | |||
| till | to | |||
| är | is | |||
| men | but | |||
| ett | a | |||
| om | if; around, about | |||
| hade | had | |||
| de | they, these/those | |||
| av | of | |||
| icke | not, no | |||
| mig | me | |||
| du | you | |||
| henne | her | |||
| då | then, when | |||
| sin | his | |||
| nu | now | |||
| har | have | |||
| inte | inte någon = no one | |||
| hans | his | |||
| honom | him | |||
| skulle | 'sake' | |||
| hennes | her | |||
| där | there | |||
| min | my | |||
| man | one (pronoun) | |||
| ej | nor | |||
| vid | at, by, on (also: vast) | |||
| kunde | could | |||
| något | some etc | |||
| från | from, off | |||
| ut | out | |||
| när | when | |||
| efter | after, behind | |||
| upp | up | |||
| vi | we | |||
| dem | them | |||
| vara | be | |||
| vad | what | |||
| över | over | |||
| än | than | |||
| dig | you | |||
| kan | can | |||
| sina | his | |||
| här | here | |||
| ha | have | |||
| mot | towards | |||
| alla | all | |||
| under | under (also: wonder) | |||
| någon | some etc | |||
| eller | or (else) | |||
| allt | all | |||
| mycket | much | |||
| sedan | since | |||
| ju | why | |||
| denna | this/that | |||
| själv | myself, yourself etc | |||
| detta | this/that | |||
| åt | to | |||
| utan | without | |||
| varit | was | |||
| hur | how | |||
| ingen | no | |||
| mitt | my | |||
| ni | you | |||
| bli | to be, become | |||
| blev | from bli | |||
| oss | us | |||
| din | thy | |||
| dessa | these/those | |||
| några | some etc | |||
| deras | their | |||
| blir | from bli | |||
| mina | my | |||
| samma | (the) same | |||
| vilken | who, that | |||
| er | you, your | |||
| sådan | such a | |||
| vår | our | |||
| blivit | from bli | |||
| dess | its | |||
| inom | within | |||
| mellan | between | |||
| sådant | such a | |||
| varför | why | |||
| varje | each | |||
| vilka | who, that | |||
| ditt | thy | |||
| vem | who | |||
| vilket | who, that | |||
| sitta | his | |||
| sådana | such a | |||
| vart | each | |||
| dina | thy | |||
| vars | whose | |||
| vårt | our | |||
| våra | our | |||
| ert | your | |||
| era | your | |||
| vilkas | whose | |||
| @@ -0,0 +1,119 @@ | |||
| # Thai stopwords from: | |||
| # "Opinion Detection in Thai Political News Columns | |||
| # Based on Subjectivity Analysis" | |||
| # Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak | |||
| ไว้ | |||
| ไม่ | |||
| ไป | |||
| ได้ | |||
| ให้ | |||
| ใน | |||
| โดย | |||
| แห่ง | |||
| แล้ว | |||
| และ | |||
| แรก | |||
| แบบ | |||
| แต่ | |||
| เอง | |||
| เห็น | |||
| เลย | |||
| เริ่ม | |||
| เรา | |||
| เมื่อ | |||
| เพื่อ | |||
| เพราะ | |||
| เป็นการ | |||
| เป็น | |||
| เปิดเผย | |||
| เปิด | |||
| เนื่องจาก | |||
| เดียวกัน | |||
| เดียว | |||
| เช่น | |||
| เฉพาะ | |||
| เคย | |||
| เข้า | |||
| เขา | |||
| อีก | |||
| อาจ | |||
| อะไร | |||
| ออก | |||
| อย่าง | |||
| อยู่ | |||
| อยาก | |||
| หาก | |||
| หลาย | |||
| หลังจาก | |||
| หลัง | |||
| หรือ | |||
| หนึ่ง | |||
| ส่วน | |||
| ส่ง | |||
| สุด | |||
| สําหรับ | |||
| ว่า | |||
| วัน | |||
| ลง | |||
| ร่วม | |||
| ราย | |||
| รับ | |||
| ระหว่าง | |||
| รวม | |||
| ยัง | |||
| มี | |||
| มาก | |||
| มา | |||
| พร้อม | |||
| พบ | |||
| ผ่าน | |||
| ผล | |||
| บาง | |||
| น่า | |||
| นี้ | |||
| นํา | |||
| นั้น | |||
| นัก | |||
| นอกจาก | |||
| ทุก | |||
| ที่สุด | |||
| ที่ | |||
| ทําให้ | |||
| ทํา | |||
| ทาง | |||
| ทั้งนี้ | |||
| ทั้ง | |||
| ถ้า | |||
| ถูก | |||
| ถึง | |||
| ต้อง | |||
| ต่างๆ | |||
| ต่าง | |||
| ต่อ | |||
| ตาม | |||
| ตั้งแต่ | |||
| ตั้ง | |||
| ด้าน | |||
| ด้วย | |||
| ดัง | |||
| ซึ่ง | |||
| ช่วง | |||
| จึง | |||
| จาก | |||
| จัด | |||
| จะ | |||
| คือ | |||
| ความ | |||
| ครั้ง | |||
| คง | |||
| ขึ้น | |||
| ของ | |||
| ขอ | |||
| ขณะ | |||
| ก่อน | |||
| ก็ | |||
| การ | |||
| กับ | |||
| กัน | |||
| กว่า | |||
| กล่าว | |||
| @@ -0,0 +1,212 @@ | |||
| # Turkish stopwords from LUCENE-559 | |||
| # merged with the list from "Information Retrieval on Turkish Texts" | |||
| # (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) | |||
| acaba | |||
| altmış | |||
| altı | |||
| ama | |||
| ancak | |||
| arada | |||
| aslında | |||
| ayrıca | |||
| bana | |||
| bazı | |||
| belki | |||
| ben | |||
| benden | |||
| beni | |||
| benim | |||
| beri | |||
| beş | |||
| bile | |||
| bin | |||
| bir | |||
| birçok | |||
| biri | |||
| birkaç | |||
| birkez | |||
| birşey | |||
| birşeyi | |||
| biz | |||
| bize | |||
| bizden | |||
| bizi | |||
| bizim | |||
| böyle | |||
| böylece | |||
| bu | |||
| buna | |||
| bunda | |||
| bundan | |||
| bunlar | |||
| bunları | |||
| bunların | |||
| bunu | |||
| bunun | |||
| burada | |||
| çok | |||
| çünkü | |||
| da | |||
| daha | |||
| dahi | |||
| de | |||
| defa | |||
| değil | |||
| diğer | |||
| diye | |||
| doksan | |||
| dokuz | |||
| dolayı | |||
| dolayısıyla | |||
| dört | |||
| edecek | |||
| eden | |||
| ederek | |||
| edilecek | |||
| ediliyor | |||
| edilmesi | |||
| ediyor | |||
| eğer | |||
| elli | |||
| en | |||
| etmesi | |||
| etti | |||
| ettiği | |||
| ettiğini | |||
| gibi | |||
| göre | |||
| halen | |||
| hangi | |||
| hatta | |||
| hem | |||
| henüz | |||
| hep | |||
| hepsi | |||
| her | |||
| herhangi | |||
| herkesin | |||
| hiç | |||
| hiçbir | |||
| için | |||
| iki | |||
| ile | |||
| ilgili | |||
| ise | |||
| işte | |||
| itibaren | |||
| itibariyle | |||
| kadar | |||
| karşın | |||
| katrilyon | |||
| kendi | |||
| kendilerine | |||
| kendini | |||
| kendisi | |||
| kendisine | |||
| kendisini | |||
| kez | |||
| ki | |||
| kim | |||
| kimden | |||
| kime | |||
| kimi | |||
| kimse | |||
| kırk | |||
| milyar | |||
| milyon | |||
| mu | |||
| mü | |||
| mı | |||
| nasıl | |||
| ne | |||
| neden | |||
| nedenle | |||
| nerde | |||
| nerede | |||
| nereye | |||
| niye | |||
| niçin | |||
| o | |||
| olan | |||
| olarak | |||
| oldu | |||
| olduğu | |||
| olduğunu | |||
| olduklarını | |||
| olmadı | |||
| olmadığı | |||
| olmak | |||
| olması | |||
| olmayan | |||
| olmaz | |||
| olsa | |||
| olsun | |||
| olup | |||
| olur | |||
| olursa | |||
| oluyor | |||
| on | |||
| ona | |||
| ondan | |||
| onlar | |||
| onlardan | |||
| onları | |||
| onların | |||
| onu | |||
| onun | |||
| otuz | |||
| oysa | |||
| öyle | |||
| pek | |||
| rağmen | |||
| sadece | |||
| sanki | |||
| sekiz | |||
| seksen | |||
| sen | |||
| senden | |||
| seni | |||
| senin | |||
| siz | |||
| sizden | |||
| sizi | |||
| sizin | |||
| şey | |||
| şeyden | |||
| şeyi | |||
| şeyler | |||
| şöyle | |||
| şu | |||
| şuna | |||
| şunda | |||
| şundan | |||
| şunları | |||
| şunu | |||
| tarafından | |||
| trilyon | |||
| tüm | |||
| üç | |||
| üzere | |||
| var | |||
| vardı | |||
| ve | |||
| veya | |||
| ya | |||
| yani | |||
| yapacak | |||
| yapılan | |||
| yapılması | |||
| yapıyor | |||
| yapmak | |||
| yaptı | |||
| yaptığı | |||
| yaptığını | |||
| yaptıkları | |||
| yedi | |||
| yerine | |||
| yetmiş | |||
| yine | |||
| yirmi | |||
| yoksa | |||
| yüz | |||
| zaten | |||
| @@ -0,0 +1,29 @@ | |||
| # | |||
| # This is a sample user dictionary for Kuromoji (JapaneseTokenizer) | |||
| # | |||
| # Add entries to this file in order to override the statistical model in terms | |||
| # of segmentation, readings and part-of-speech tags. Notice that entries do | |||
| # not have weights since they are always used when found. This is by-design | |||
| # in order to maximize ease-of-use. | |||
| # | |||
| # Entries are defined using the following CSV format: | |||
| # <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag> | |||
| # | |||
| # Notice that a single half-width space separates tokens and readings, and | |||
| # that the number tokens and readings must match exactly. | |||
| # | |||
| # Also notice that multiple entries with the same <text> is undefined. | |||
| # | |||
| # Whitespace only lines are ignored. Comments are not allowed on entry lines. | |||
| # | |||
| # Custom segmentation for kanji compounds | |||
| 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 | |||
| 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 | |||
| # Custom segmentation for compound katakana | |||
| トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 | |||
| ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 | |||
| # Custom reading for former sumo wrestler | |||
| 朝青龍,朝青龍,アサショウリュウ,カスタム人名 | |||
| @@ -0,0 +1,34 @@ | |||
| {"params":{ | |||
| "query":{ | |||
| "defType":"edismax", | |||
| "q.alt":"*:*", | |||
| "rows":"10", | |||
| "fl":"*,score", | |||
| "":{"v":0}}, | |||
| "facets":{ | |||
| "facet":"on", | |||
| "facet.mincount":"1", | |||
| "f.doc_type.facet.mincount":"0", | |||
| "facet.field":["text_shingles","{!ex=type}doc_type", "language"], | |||
| "f.text_shingles.facet.limit":10, | |||
| "facet.query":"{!ex=type key=all_types}*:*", | |||
| "f.doc_type.facet.missing":true, | |||
| "":{"v":0}}, | |||
| "browse":{ | |||
| "type_fq":"{!field f=doc_type v=$type}", | |||
| "hl":"on", | |||
| "hl.fl":"content", | |||
| "v.locale":"${locale}", | |||
| "debug":"true", | |||
| "hl.simple.pre":"HL_START", | |||
| "hl.simple.post":"HL_END", | |||
| "echoParams": "explicit", | |||
| "_appends_": { | |||
| "fq": "{!switch v=$type tag=type case='*:*' case.all='*:*' case.unknown='-doc_type:[* TO *]' default=$type_fq}" | |||
| }, | |||
| "":{"v":0}}, | |||
| "velocity":{ | |||
| "wt":"velocity", | |||
| "v.template":"browse", | |||
| "v.layout":"layout", | |||
| "":{"v":0}}}} | |||
| @@ -0,0 +1,21 @@ | |||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |||
| # (the "License"); you may not use this file except in compliance with | |||
| # the License. You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| #----------------------------------------------------------------------- | |||
| # Use a protected word file to protect against the stemmer reducing two | |||
| # unrelated words to the same base word. | |||
| # Some non-words that normally won't be encountered, | |||
| # just to test that they won't be stemmed. | |||
| dontstems | |||
| zwhacky | |||
| @@ -0,0 +1,530 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <!-- Solr managed schema - automatically generated - DO NOT EDIT --> | |||
| <schema name="example-data-driven-schema" version="1.6"> | |||
| <uniqueKey>id</uniqueKey> | |||
| <fieldType name="ancestor_path" class="solr.TextField"> | |||
| <analyzer type="index"> | |||
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |||
| </analyzer> | |||
| <analyzer type="query"> | |||
| <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="binary" class="solr.BinaryField"/> | |||
| <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | |||
| <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> | |||
| <fieldType name="currency" class="solr.CurrencyFieldType" amountLongSuffix="_l_ns" codeStrSuffix="_s_ns" defaultCurrency="USD" currencyConfig="currency.xml" /> | |||
| <fieldType name="descendent_path" class="solr.TextField"> | |||
| <analyzer type="index"> | |||
| <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> | |||
| </analyzer> | |||
| <analyzer type="query"> | |||
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/> | |||
| <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | |||
| <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" maxDistErr="0.001" distErrPct="0.025" distanceUnits="kilometers"/> | |||
| <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="phonetic_en" class="solr.TextField" indexed="true" stored="false"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> | |||
| <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> | |||
| <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> | |||
| <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> | |||
| <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> | |||
| <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> | |||
| <fieldType name="pint" class="solr.IntPointField" docValues="true"/> | |||
| <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> | |||
| <fieldType name="plong" class="solr.LongPointField" docValues="true"/> | |||
| <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> | |||
| <fieldType name="point" class="solr.PointType" subFieldSuffix="_d" dimension="2"/> | |||
| <fieldType name="random" class="solr.RandomSortField" indexed="true"/> | |||
| <fieldType name="string" class="solr.StrField" sortMissingLast="true"/> | |||
| <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/> | |||
| <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ar.txt" ignoreCase="true"/> | |||
| <filter class="solr.ArabicNormalizationFilterFactory"/> | |||
| <filter class="solr.ArabicStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_bg.txt" ignoreCase="true"/> | |||
| <filter class="solr.BulgarianStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_ca.txt" ignoreCase="true"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ca.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.CJKWidthFilterFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.CJKBigramFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_cz.txt" ignoreCase="true"/> | |||
| <filter class="solr.CzechStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_da.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Danish"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_de.txt" ignoreCase="true"/> | |||
| <filter class="solr.GermanNormalizationFilterFactory"/> | |||
| <filter class="solr.GermanLightStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.GreekLowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_el.txt" ignoreCase="false"/> | |||
| <filter class="solr.GreekStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer type="index"> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.EnglishPossessiveFilterFactory"/> | |||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
| <filter class="solr.PorterStemFilterFactory"/> | |||
| </analyzer> | |||
| <analyzer type="query"> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.EnglishPossessiveFilterFactory"/> | |||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
| <filter class="solr.PorterStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |||
| <analyzer type="index"> | |||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
| <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
| <filter class="solr.PorterStemFilterFactory"/> | |||
| <filter class="solr.FlattenGraphFilterFactory" /> | |||
| </analyzer> | |||
| <analyzer type="query"> | |||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
| <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
| <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
| <filter class="solr.PorterStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |||
| <analyzer type="index"> | |||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
| <filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
| <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
| <filter class="solr.EnglishMinimalStemFilterFactory"/> | |||
| <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |||
| <filter class="solr.FlattenGraphFilterFactory" /> | |||
| </analyzer> | |||
| <analyzer type="query"> | |||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
| <filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
| <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
| <filter class="solr.EnglishMinimalStemFilterFactory"/> | |||
| <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_es.txt" ignoreCase="true"/> | |||
| <filter class="solr.SpanishLightStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_eu.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Basque"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <charFilter class="solr.PersianCharFilterFactory"/> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.ArabicNormalizationFilterFactory"/> | |||
| <filter class="solr.PersianNormalizationFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_fa.txt" ignoreCase="true"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fi.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_fr.txt" ignoreCase="true"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fr.txt" ignoreCase="true"/> | |||
| <filter class="solr.FrenchLightStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_ga.txt" ignoreCase="true"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/hyphenations_ga.txt" ignoreCase="true"/> | |||
| <filter class="solr.IrishLowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ga.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Irish"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> | |||
| <analyzer type="index"> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| </analyzer> | |||
| <analyzer type="query"> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||
| <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer type="index"> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.ReversedWildcardFilterFactory" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/> | |||
| </analyzer> | |||
| <analyzer type="query"> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||
| <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_gl.txt" ignoreCase="true"/> | |||
| <filter class="solr.GalicianStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.IndicNormalizationFilterFactory"/> | |||
| <filter class="solr.HindiNormalizationFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_hi.txt" ignoreCase="true"/> | |||
| <filter class="solr.HindiStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_hu.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_hy.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_id.txt" ignoreCase="true"/> | |||
| <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt" ignoreCase="true"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_it.txt" ignoreCase="true"/> | |||
| <filter class="solr.ItalianLightStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_ja" class="solr.TextField" autoGeneratePhraseQueries="false" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> | |||
| <filter class="solr.JapaneseBaseFormFilterFactory"/> | |||
| <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt"/> | |||
| <filter class="solr.CJKWidthFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ja.txt" ignoreCase="true"/> | |||
| <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> | |||
| <filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> | |||
| <filter class="solr.KoreanReadingFormFilterFactory" /> | |||
| <filter class="solr.LowerCaseFilterFactory" /> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_lv.txt" ignoreCase="true"/> | |||
| <filter class="solr.LatvianStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_nl.txt" ignoreCase="true"/> | |||
| <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_no.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_pt.txt" ignoreCase="true"/> | |||
| <filter class="solr.PortugueseLightStemFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ro.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_ru.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Russian"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_sv.txt" ignoreCase="true"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.ThaiTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_th.txt" ignoreCase="true"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <filter class="solr.TurkishLowerCaseFilterFactory"/> | |||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_tr.txt" ignoreCase="false"/> | |||
| <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_email_url" class="solr.TextField"> | |||
| <analyzer> | |||
| <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> | |||
| <filter class="solr.TypeTokenFilterFactory" types="email_url_types.txt" useWhitelist="true"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <fieldType name="text_shingles" class="solr.TextField" positionIncrementGap="100" multiValued="true"> | |||
| <analyzer type="index"> | |||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||
| <!-- <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="false" /> --> | |||
| <filter class="solr.LengthFilterFactory" min="2" max="18"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| <filter class="solr.PatternReplaceFilterFactory" pattern="(^[^a-z]+$)" replacement="" replace="all"/> | |||
| <filter class="solr.ShingleFilterFactory" minShingleSize="3" maxShingleSize="3" | |||
| outputUnigrams="false" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="*"/> | |||
| <filter class="solr.PatternReplaceFilterFactory" pattern="(.*[\*].*)" replacement=""/> | |||
| <filter class="solr.TrimFilterFactory"/> | |||
| <!-- PRFF could have removed everything down to an empty string, remove if so --> | |||
| <filter class="solr.LengthFilterFactory" min="1" max="100"/> | |||
| </analyzer> | |||
| <analyzer type="query"> | |||
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |||
| <filter class="solr.LowerCaseFilterFactory"/> | |||
| </analyzer> | |||
| </fieldType> | |||
| <field name="id" type="string" multiValued="false" indexed="true" required="true" stored="true"/> | |||
| <field name="_version_" type="plong" indexed="true" stored="true"/> | |||
| <field name="content_type" type="string" indexed="true" stored="true"/> | |||
| <field name="doc_type" type="string" indexed="true" stored="true"/> | |||
| <field name="title" type="string" indexed="true" stored="true"/> | |||
| <field name="language" type="string" indexed="true" stored="true"/> | |||
| <field name="content" type="text_general" multiValued="false" indexed="true" stored="true"/> | |||
| <field name="text_shingles" type="text_shingles" indexed="true" stored="false"/> | |||
| <field name="_text_" type="text_general" multiValued="true" indexed="true" stored="false"/> | |||
| <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> | |||
| <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> | |||
| <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> | |||
| <dynamicField name="*_coordinate" type="pdouble" indexed="true" stored="false"/> | |||
| <dynamicField name="ignored_*" type="ignored" multiValued="true"/> | |||
| <dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/> | |||
| <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> | |||
| <dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/> | |||
| <dynamicField name="random_*" type="random"/> | |||
| <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_fa" type="text_fa" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/> | |||
| <dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/> | |||
| <dynamicField name="*_point" type="point" indexed="true" stored="true"/> | |||
| <dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/> | |||
| <dynamicField name="attr_*" type="text_general" multiValued="true" indexed="true" stored="true"/> | |||
| <dynamicField name="*_l_ns" type="plong" indexed="true" stored="false"/> | |||
| <dynamicField name="*_s_ns" type="string" indexed="true" stored="false"/> | |||
| <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/> | |||
| <dynamicField name="*_dts" type="pdate" multiValued="true" indexed="true" stored="true"/> | |||
| <dynamicField name="*_is" type="pints" indexed="true" stored="true"/> | |||
| <dynamicField name="*_ss" type="strings" indexed="true" stored="true"/> | |||
| <dynamicField name="*_ls" type="plongs" indexed="true" stored="true"/> | |||
| <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/> | |||
| <dynamicField name="*_fs" type="pfloats" indexed="true" stored="true"/> | |||
| <dynamicField name="*_ds" type="pdoubles" indexed="true" stored="true"/> | |||
| <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> | |||
| <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> | |||
| <dynamicField name="*_i" type="pint" indexed="true" stored="true"/> | |||
| <dynamicField name="*_s" type="string" indexed="true" stored="true"/> | |||
| <dynamicField name="*_l" type="plong" indexed="true" stored="true"/> | |||
| <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/> | |||
| <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> | |||
| <dynamicField name="*_f" type="pfloat" indexed="true" stored="true"/> | |||
| <dynamicField name="*_d" type="pdouble" indexed="true" stored="true"/> | |||
| <dynamicField name="*_p" type="location" indexed="true" stored="true"/> | |||
| <dynamicField name="*_c" type="currency" indexed="true" stored="true"/> | |||
| <copyField source="content" dest="text_shingles"/> | |||
| <copyField source="*" dest="_text_"/> | |||
| <!-- ADDED BY SIMON BOWIE 2022-04-04 --> | |||
| <copyField source="content" dest="year"/> | |||
| <field name="year" type="year" indexed="true" stored="true"/> | |||
| <fieldType name="year" class="solr.TextField" positionIncrementGap="100"> | |||
| <analyzer> | |||
| <tokenizer class="solr.PatternTokenizerFactory" pattern="=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})" group="1" /> | |||
| </analyzer> | |||
| </fieldType> | |||
| <!-- END --> | |||
| </schema> | |||
| @@ -0,0 +1,14 @@ | |||
| # Licensed to the Apache Software Foundation (ASF) under one or more | |||
| # contributor license agreements. See the NOTICE file distributed with | |||
| # this work for additional information regarding copyright ownership. | |||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |||
| # (the "License"); you may not use this file except in compliance with | |||
| # the License. You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| @@ -0,0 +1,29 @@ | |||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |||
| # (the "License"); you may not use this file except in compliance with | |||
| # the License. You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| #----------------------------------------------------------------------- | |||
| #some test synonym mappings unlikely to appear in real input text | |||
| aaafoo => aaabar | |||
| bbbfoo => bbbfoo bbbbar | |||
| cccfoo => cccbar cccbaz | |||
| fooaaa,baraaa,bazaaa | |||
| # Some synonym groups specific to this example | |||
| GB,gib,gigabyte,gigabytes | |||
| MB,mib,megabyte,megabytes | |||
| Television, Televisions, TV, TVs | |||
| #notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming | |||
| #after us won't split it into two words. | |||
| # Synonym mappings can be used for spelling correction too | |||
| pixima => pixma | |||
| @@ -0,0 +1,115 @@ | |||
| function get_class(name) { | |||
| var clazz; | |||
| try { | |||
| // Java8 Nashorn | |||
| clazz = eval("Java.type(name).class"); | |||
| } catch(e) { | |||
| // Java7 Rhino | |||
| clazz = eval("Packages."+name); | |||
| } | |||
| return clazz; | |||
| } | |||
| function processAdd(cmd) { | |||
| doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument | |||
| var id = doc.getFieldValue("id"); | |||
| logger.info("update-script#processAdd: id=" + id); | |||
| // The idea here is to use the file's content_type value to | |||
| // simplify into user-friendly values, such that types of, say, image/jpeg and image/tiff | |||
| // are in an "Images" facet | |||
| var ct = doc.getFieldValue("content_type"); | |||
| if (ct) { | |||
| // strip off semicolon onward | |||
| var semicolon_index = ct.indexOf(';'); | |||
| if (semicolon_index != -1) { | |||
| ct = ct.substring(0,semicolon_index); | |||
| } | |||
| // and split type/subtype | |||
| var ct_type = ct.substring(0,ct.indexOf('/')); | |||
| var ct_subtype = ct.substring(ct.indexOf('/')+1); | |||
| var doc_type; | |||
| switch(true) { | |||
| case /^application\/rtf/.test(ct) || /wordprocessing/.test(ct): | |||
| doc_type = "doc"; | |||
| break; | |||
| case /html/.test(ct): | |||
| doc_type = "html"; | |||
| break; | |||
| case /^image\/.*/.test(ct): | |||
| doc_type = "image"; | |||
| break; | |||
| case /presentation|powerpoint/.test(ct): | |||
| doc_type = "presentation"; | |||
| break; | |||
| case /spreadsheet|excel/.test(ct): | |||
| doc_type = "spreadsheet"; | |||
| break; | |||
| case /^application\/pdf/.test(ct): | |||
| doc_type = "pdf"; | |||
| break; | |||
| case /^text\/plain/.test(ct): | |||
| doc_type = "text" | |||
| break; | |||
| default: | |||
| break; | |||
| } | |||
| // TODO: error handling needed? What if there is no slash? | |||
| if(doc_type) { doc.setField("doc_type", doc_type); } | |||
| doc.setField("content_type_type_s", ct_type); | |||
| doc.setField("content_type_subtype_s", ct_subtype); | |||
| } | |||
| var content = doc.getFieldValue("content"); | |||
| if (!content) { | |||
| return; //No content found, so we are done here | |||
| } | |||
| var analyzer = | |||
| req.getCore().getLatestSchema() | |||
| .getFieldTypeByName("text_email_url") | |||
| .getIndexAnalyzer(); | |||
| var token_stream = | |||
| analyzer.tokenStream("content", content); | |||
| var term_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.CharTermAttribute")); | |||
| var type_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.TypeAttribute")); | |||
| token_stream.reset(); | |||
| while (token_stream.incrementToken()) { | |||
| doc.addField(type_att.type().replace(/\<|\>/g,'').toLowerCase()+"_ss", term_att.toString()); | |||
| } | |||
| token_stream.end(); | |||
| token_stream.close(); | |||
| } | |||
| function processDelete(cmd) { | |||
| // no-op | |||
| } | |||
| function processMergeIndexes(cmd) { | |||
| // no-op | |||
| } | |||
| function processCommit(cmd) { | |||
| // no-op | |||
| } | |||
| function processRollback(cmd) { | |||
| // no-op | |||
| } | |||
| function finish() { | |||
| // no-op | |||
| } | |||
| @@ -0,0 +1,32 @@ | |||
| <div id="query-box"> | |||
| <form id="query-form" action="#{url_for_home}" method="GET"> | |||
| $resource.find: | |||
| <input type="text" id="q" name="q" style="width: 50%" value="$!esc.html($request.params.get('q'))"/> | |||
| <input type="submit" value="$resource.submit"/> | |||
| <div id="debug_query" class="debug"> | |||
| <span id="parsed_query">$esc.html($response.response.debug.parsedquery)</span> | |||
| </div> | |||
| <input type="hidden" name="type" value="#current_type"/> | |||
| #if("#current_locale"!="")<input type="hidden" value="locale" value="#current_locale"/>#end | |||
| #foreach($fq in $response.responseHeader.params.getAll("fq")) | |||
| <input type="hidden" name="fq" id="allFQs" value="$esc.html($fq)"/> | |||
| #end | |||
| </form> | |||
| <div id="constraints"> | |||
| #foreach($fq in $response.responseHeader.params.getAll("fq")) | |||
| #set($previous_fq_count=$velocityCount - 1) | |||
| #if($fq != '') | |||
| > $fq<a href="#url_for_filters($response.responseHeader.params.fq.subList(0,$previous_fq_count))">x</a> | |||
| #end | |||
| #end | |||
| </div> | |||
| </div> | |||
| <div id="browse_results"> | |||
| #parse("results.vm") | |||
| </div> | |||
| @@ -0,0 +1,2 @@ | |||
| ## intentionally empty | |||
| @@ -0,0 +1,12 @@ | |||
| <div id="facet_$field.name"> | |||
| <span class="facet-field">$resource.facet.top_phrases</span><br/> | |||
| <ul id="tagcloud"> | |||
| #foreach($facet in $sort.sort($field.values,"name")) | |||
| <li data-weight="$math.mul($facet.count,1)"> | |||
| <a href="#url_for_facet_filter($field.name, $facet.name)">$facet.name</a> | |||
| </li> | |||
| #end | |||
| </ul> | |||
| </div> | |||
| @@ -0,0 +1,24 @@ | |||
| #if($response.facetFields.size() > 0) | |||
| #foreach($field in $response.facetFields) | |||
| #if($field.values.size() > 0) | |||
| #if($engine.resourceExists("facet_${field.name}.vm")) | |||
| #parse("facet_${field.name}.vm") | |||
| #else | |||
| <div id="facet_$field.name" class="facet_field"> | |||
| <span class="facet-field">#label("facet.${field.name}",$field.name)</span><br/> | |||
| <ul> | |||
| #foreach($facet in $field.values) | |||
| <li><a href="#url_for_facet_filter($field.name, $facet.name)">#if($facet.name!=$null)#label("${field.name}.${facet.name}","${field.name}.${facet.name}")#else<em>missing</em>#end</a> ($facet.count)</li> | |||
| #end | |||
| </ul> | |||
| </div> | |||
| #end | |||
| #end | |||
| #end ## end if field.values > 0 | |||
| #end ## end if facetFields > 0 | |||
| @@ -0,0 +1,29 @@ | |||
| <hr/> | |||
| <div> | |||
| <div id="admin"><a href="#url_root/index.html#/#{core_name}">Solr Admin</a></div> | |||
| <a href="#" onclick='jQuery(".debug").toggle(); return false;'>toggle debug mode</a> | |||
| <a href="#url_for_lens&wt=xml#if($debug)&debug=true#end">XML results</a> ## TODO: Add links for other formats, maybe dynamically? | |||
| </div> | |||
| <div> | |||
| <a href="http://lucene.apache.org/solr">Solr Home Page</a> | |||
| </div> | |||
| <div class="debug"> | |||
| <hr/> | |||
| Request: | |||
| <pre> | |||
| $esc.html($request) | |||
| </pre> | |||
| <hr/> | |||
| Debug: | |||
| <pre> | |||
| $esc.html($response.response.debug) | |||
| </pre> | |||
| </div> | |||
| @@ -0,0 +1,290 @@ | |||
| <title>Solr browse: #core_name</title> | |||
| <meta http-equiv="content-type" content="text/html; charset=UTF-8"/> | |||
| <link rel="icon" type="image/x-icon" href="#{url_root}/img/favicon.ico"/> | |||
| <link rel="shortcut icon" type="image/x-icon" href="#{url_root}/img/favicon.ico"/> | |||
| <script type="text/javascript" src="#{url_root}/libs/jquery-3.4.1.min.js"></script> | |||
| <script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/jquery.tx3-tag-cloud.js&contentType=text/javascript"></script> | |||
| <script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/dropit.js&contentType=text/javascript"></script> | |||
| <script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/jquery.autocomplete.js&contentType=text/javascript"></script> | |||
| <script type="text/javascript"> | |||
| $(document).ready(function() { | |||
| $("#tagcloud").tx3TagCloud({ | |||
| multiplier: 1 | |||
| }); | |||
| $('.menu').dropit(); | |||
| $( document ).ajaxComplete(function() { | |||
| $("#tagcloud").tx3TagCloud({ | |||
| multiplier: 5 | |||
| }); | |||
| }); | |||
| $('\#q').keyup(function() { | |||
| $('#browse_results').load('#{url_for_home}?#lensNoQ&v.layout.enabled=false&v.template=results&q='+encodeURI($('\#q').val())); | |||
| $("\#q").autocomplete('#{url_for_solr}/suggest', { | |||
| extraParams: { | |||
| 'suggest.q': function() { return $("\#q").val();}, | |||
| 'suggest.build': 'true', | |||
| 'wt': 'json', | |||
| } | |||
| }).keydown(function(e) { | |||
| if (e.keyCode === 13){ | |||
| $("#query-form").trigger('submit'); | |||
| } | |||
| }); | |||
| }); | |||
| }); | |||
| </script> | |||
| <style> | |||
| html { | |||
| background-color: #F0F8FF; | |||
| } | |||
| body { | |||
| font-family: Helvetica, Arial, sans-serif; | |||
| font-size: 10pt; | |||
| } | |||
| #header { | |||
| width: 100%; | |||
| font-size: 20pt; | |||
| } | |||
| #header2 { | |||
| margin-left:1200px; | |||
| } | |||
| #logo { | |||
| width: 115px; | |||
| margin: 0px 0px 0px 0px; | |||
| border-style: none; | |||
| } | |||
| a { | |||
| color: #305CB3; | |||
| } | |||
| a.hidden { | |||
| display:none; | |||
| } | |||
| em { | |||
| color: #FF833D; | |||
| } | |||
| .error { | |||
| color: white; | |||
| background-color: red; | |||
| left: 210px; | |||
| width:80%; | |||
| position: relative; | |||
| } | |||
| .debug { display: none; font-size: 10pt} | |||
| #debug_query { | |||
| font-family: Helvetica, Arial, sans-serif; | |||
| font-size: 10pt; | |||
| font-weight: bold; | |||
| } | |||
| #parsed_query { | |||
| font-family: Courier, Courier New, monospaced; | |||
| font-size: 10pt; | |||
| font-weight: normal; | |||
| } | |||
| #admin { | |||
| text-align: right; | |||
| vertical-align: top; | |||
| } | |||
| #query-form { | |||
| width: 90%; | |||
| } | |||
| #query-box { | |||
| padding: 5px; | |||
| margin: 5px; | |||
| font-weight: normal; | |||
| font-size: 24px; | |||
| letter-spacing: 0.08em; | |||
| } | |||
| #constraints { | |||
| margin: 10px; | |||
| } | |||
| #tabs { } | |||
| #tabs li { display: inline; font-size: 10px;} | |||
| #tabs li a { border-radius: 20px; border: 2px solid #C1CDCD; padding: 10px;color: #42454a; background-color: #dedbde;} | |||
| #tabs li a:hover { background-color: #f1f0ee; } | |||
| #tabs li a.selected { color: #000; background-color: #f1f0ee; font-weight: bold; padding: 5px } | |||
| #tabs li a.no_results { color: #000; background-color: #838B8B; font-style: italic; padding: 5px; pointer-events: none; | |||
| cursor: default; text-decoration: none;} | |||
| .pagination { | |||
| width: 305px; | |||
| border-radius: 25px; | |||
| border: 2px solid #C1CDCD; | |||
| padding: 20px; | |||
| padding-left: 10%; | |||
| background: #eee; | |||
| margin-left: 190px; | |||
| margin-top : 42px; | |||
| padding-top: 5px; | |||
| padding-bottom: 5px; | |||
| text-align:left; | |||
| } | |||
| #results_list { width: 70%; } | |||
| .result-document { | |||
| border-radius: 25px; | |||
| border: 2px solid #C1CDCD; | |||
| padding: 10px; | |||
| // width: 800px; | |||
| // height: 120px; | |||
| margin: 5px; | |||
| // margin-left: 60px; | |||
| // margin-right: 210px; | |||
| // margin-bottom: 15px; | |||
| transition: 1s ease; | |||
| } | |||
| .result-document:hover | |||
| { | |||
| webkit-transform: scale(1.1); | |||
| -ms-transform: scale(1.1); | |||
| transform: scale(1.1); | |||
| transition: 1s ease; | |||
| } | |||
| .result-document div { | |||
| padding: 5px; | |||
| } | |||
| .result-title { | |||
| width:60%; | |||
| } | |||
| .result-body { | |||
| background: #ddd; | |||
| } | |||
| .result-document:nth-child(2n+1) { | |||
| background-color: #FFFFFD; | |||
| } | |||
| #facets { | |||
| margin: 5px; | |||
| margin-top: 0px; | |||
| padding: 5px; | |||
| top: -20px; | |||
| position: relative; | |||
| float: right; | |||
| width: 25%; | |||
| } | |||
| .facet-field { | |||
| font-weight: bold; | |||
| } | |||
| #facets ul { | |||
| list-style: none; | |||
| margin: 0; | |||
| margin-bottom: 5px; | |||
| margin-top: 5px; | |||
| padding-left: 10px; | |||
| } | |||
| #facets ul li { | |||
| color: #999; | |||
| padding: 2px; | |||
| } | |||
| div.facet_field { | |||
| clear: left; | |||
| } | |||
| ul.tx3-tag-cloud { } | |||
| ul.tx3-tag-cloud li { | |||
| display: block; | |||
| float: left; | |||
| list-style: none; | |||
| margin-right: 4px; | |||
| } | |||
| ul.tx3-tag-cloud li a { | |||
| display: block; | |||
| text-decoration: none; | |||
| color: #c9c9c9; | |||
| padding: 3px 10px; | |||
| } | |||
| ul.tx3-tag-cloud li a:hover { | |||
| color: #000000; | |||
| -webkit-transition: color 250ms linear; | |||
| -moz-transition: color 250ms linear; | |||
| -o-transition: color 250ms linear; | |||
| -ms-transition: color 250ms linear; | |||
| transition: color 250ms linear; | |||
| } | |||
| .dropit { | |||
| list-style: none; | |||
| padding: 0; | |||
| margin: 0; | |||
| } | |||
| .dropit .dropit-trigger { position: relative; } | |||
| .dropit .dropit-submenu { | |||
| position: absolute; | |||
| top: 100%; | |||
| left: 0; /* dropdown left or right */ | |||
| z-index: 1000; | |||
| display: none; | |||
| min-width: 150px; | |||
| list-style: none; | |||
| padding: 0; | |||
| margin: 0; | |||
| } | |||
| .dropit .dropit-open .dropit-submenu { display: block; } | |||
| <!--autocomplete css--> | |||
| .ac_results { | |||
| padding: 0px; | |||
| border: 1px solid black; | |||
| background-color: white; | |||
| overflow: hidden; | |||
| z-index: 99999; | |||
| } | |||
| .ac_results ul { | |||
| width: 100%; | |||
| list-style-position: outside; | |||
| list-style: none; | |||
| padding: 0; | |||
| margin: 0; | |||
| } | |||
| .ac_results li { | |||
| margin: 0px; | |||
| padding: 2px 5px; | |||
| cursor: default; | |||
| display: block; | |||
| font: menu; | |||
| font-size: 12px; | |||
| line-height: 16px; | |||
| overflow: hidden; | |||
| } | |||
| .ac_loading { | |||
| // background: white url('˜indicator.gif') right center no-repeat; | |||
| } | |||
| .ac_odd { | |||
| background-color: #eee; | |||
| } | |||
| .ac_over { | |||
| background-color: #0A246A; | |||
| color: white; | |||
| } | |||
| </style> | |||
| @@ -0,0 +1,77 @@ | |||
| #set($docId = $doc.getFirstValue($request.schema.uniqueKeyField.name)) | |||
| ## Load Mime-Type List and Mapping | |||
| #parse('mime_type_lists.vm') | |||
| ## Title | |||
| #if($doc.getFieldValue('title')) | |||
| #set($title = $esc.html($doc.getFirstValue('title'))) | |||
| #else | |||
| #set($title = "$doc.getFirstValue('id').substring($math.add(1,$doc.getFirstValue('id').lastIndexOf('/')))") | |||
| #end | |||
| ## Date | |||
| #if($doc.getFieldValue('attr_meta_creation_date')) | |||
| #set($date = $esc.html($doc.getFirstValue('attr_meta_creation_date'))) | |||
| #else | |||
| #set($date = "No date found") | |||
| #end | |||
| ## URL | |||
| #if($doc.getFieldValue('url')) | |||
| #set($url = $doc.getFieldValue('url')) | |||
| #elseif($doc.getFieldValue('resourcename')) | |||
| #set($url = "file:///$doc.getFirstValue('resourcename')") | |||
| #else | |||
| #set($url = "$doc.getFieldValue('id')") | |||
| #end | |||
| ## Sort out Mime-Type | |||
| #set($ct = $doc.getFirstValue('content_type').split(";").get(0)) | |||
| #set($filename = $doc.getFirstValue('resourcename')) | |||
| #set($filetype = false) | |||
| #set($filetype = $mimeExtensionsMap.get($ct)) | |||
| #if(!$filetype) | |||
| #set($filetype = $filename.substring($filename.lastIndexOf(".")).substring(1)) | |||
| #end | |||
| #if(!$filetype) | |||
| #set($filetype = "file") | |||
| #end | |||
| #if(!$supportedMimeTypes.contains($filetype)) | |||
| #set($filetype = "file") | |||
| #end | |||
| <div class="result-document"> | |||
| <span class="result-title"> | |||
| <img src="#{url_root}/img/filetypes/${filetype}.png" align="center"> | |||
| <b>$title</b> | |||
| </span> | |||
| <div> | |||
| id: $docId </br> | |||
| </div> | |||
| #set($pad = "") | |||
| #foreach($v in $response.response.highlighting.get($docId).get("content")) | |||
| $pad$esc.html($v).replace("HL_START","<em>").replace("HL_END","</em>") | |||
| #set($pad = " ... ") | |||
| #end | |||
| </div> | |||
| <a href="#" class="debug" onclick='jQuery(this).next().toggle(); return false;'>toggle explain</a> | |||
| <pre style="display: none;"> | |||
| $esc.html($response.getExplainMap().get($doc.getFirstValue('id'))) | |||
| </pre> | |||
| <a href="#" class="debug" onclick='jQuery(this).next().toggle(); return false;'>show all fields</a> | |||
| <pre style="display:none;"> | |||
| #foreach($fieldname in $doc.fieldNames) | |||
| <span>$fieldname :</span> | |||
| <span>#foreach($value in $doc.getFieldValues($fieldname))$esc.html($value)#end</span> | |||
| #end | |||
| </pre> | |||
| @@ -0,0 +1,97 @@ | |||
| /* | |||
| * Dropit v1.1.0 | |||
| * http://dev7studios.com/dropit | |||
| * | |||
| * Copyright 2012, Dev7studios | |||
| * Free to use and abuse under the MIT license. | |||
| * http://www.opensource.org/licenses/mit-license.php | |||
| */ | |||
| ;(function($) { | |||
| $.fn.dropit = function(method) { | |||
| var methods = { | |||
| init : function(options) { | |||
| this.dropit.settings = $.extend({}, this.dropit.defaults, options); | |||
| return this.each(function() { | |||
| var $el = $(this), | |||
| el = this, | |||
| settings = $.fn.dropit.settings; | |||
| // Hide initial submenus | |||
| $el.addClass('dropit') | |||
| .find('>'+ settings.triggerParentEl +':has('+ settings.submenuEl +')').addClass('dropit-trigger') | |||
| .find(settings.submenuEl).addClass('dropit-submenu').hide(); | |||
| // Open on click | |||
| $el.off(settings.action).on(settings.action, settings.triggerParentEl +':has('+ settings.submenuEl +') > '+ settings.triggerEl +'', function(){ | |||
| // Close click menu's if clicked again | |||
| if(settings.action == 'click' && $(this).parents(settings.triggerParentEl).hasClass('dropit-open')){ | |||
| settings.beforeHide.call(this); | |||
| $(this).parents(settings.triggerParentEl).removeClass('dropit-open').find(settings.submenuEl).hide(); | |||
| settings.afterHide.call(this); | |||
| return false; | |||
| } | |||
| // Hide open menus | |||
| settings.beforeHide.call(this); | |||
| $('.dropit-open').removeClass('dropit-open').find('.dropit-submenu').hide(); | |||
| settings.afterHide.call(this); | |||
| // Open this menu | |||
| settings.beforeShow.call(this); | |||
| $(this).parents(settings.triggerParentEl).addClass('dropit-open').find(settings.submenuEl).show(); | |||
| settings.afterShow.call(this); | |||
| return false; | |||
| }); | |||
| // Close if outside click | |||
| $(document).on('click', function(){ | |||
| settings.beforeHide.call(this); | |||
| $('.dropit-open').removeClass('dropit-open').find('.dropit-submenu').hide(); | |||
| settings.afterHide.call(this); | |||
| }); | |||
| // If hover | |||
| if(settings.action == 'mouseenter'){ | |||
| $el.on('mouseleave', '.dropit-open', function(){ | |||
| settings.beforeHide.call(this); | |||
| $(this).removeClass('dropit-open').find(settings.submenuEl).hide(); | |||
| settings.afterHide.call(this); | |||
| }); | |||
| } | |||
| settings.afterLoad.call(this); | |||
| }); | |||
| } | |||
| }; | |||
| if (methods[method]) { | |||
| return methods[method].apply(this, Array.prototype.slice.call(arguments, 1)); | |||
| } else if (typeof method === 'object' || !method) { | |||
| return methods.init.apply(this, arguments); | |||
| } else { | |||
| $.error( 'Method "' + method + '" does not exist in dropit plugin!'); | |||
| } | |||
| }; | |||
| $.fn.dropit.defaults = { | |||
| action: 'mouseenter', // The open action for the trigger | |||
| submenuEl: 'ul', // The submenu element | |||
| triggerEl: 'a', // The trigger element | |||
| triggerParentEl: 'li', // The trigger parent element | |||
| afterLoad: function(){}, // Triggers when plugin has loaded | |||
| beforeShow: function(){}, // Triggers before submenu is shown | |||
| afterShow: function(){}, // Triggers after submenu is shown | |||
| beforeHide: function(){}, // Triggers before submenu is hidden | |||
| afterHide: function(){} // Triggers before submenu is hidden | |||
| }; | |||
| $.fn.dropit.settings = {}; | |||
| })(jQuery); | |||
| @@ -0,0 +1,763 @@ | |||
| /* | |||
| * Autocomplete - jQuery plugin 1.1pre | |||
| * | |||
| * Copyright (c) 2007 Dylan Verheul, Dan G. Switzer, Anjesh Tuladhar, Jörn Zaefferer | |||
| * | |||
| * Dual licensed under the MIT and GPL licenses: | |||
| * http://www.opensource.org/licenses/mit-license.php | |||
| * http://www.gnu.org/licenses/gpl.html | |||
| * | |||
| * Revision: Id: jquery.autocomplete.js 5785 2008-07-12 10:37:33Z joern.zaefferer $ | |||
| * | |||
| */ | |||
| ;(function($) { | |||
| $.fn.extend({ | |||
| autocomplete: function(urlOrData, options) { | |||
| var isUrl = typeof urlOrData == "string"; | |||
| options = $.extend({}, $.Autocompleter.defaults, { | |||
| url: isUrl ? urlOrData : null, | |||
| data: isUrl ? null : urlOrData, | |||
| delay: isUrl ? $.Autocompleter.defaults.delay : 10, | |||
| max: options && !options.scroll ? 10 : 150 | |||
| }, options); | |||
| // if highlight is set to false, replace it with a do-nothing function | |||
| options.highlight = options.highlight || function(value) { return value; }; | |||
| // if the formatMatch option is not specified, then use formatItem for backwards compatibility | |||
| options.formatMatch = options.formatMatch || options.formatItem; | |||
| return this.each(function() { | |||
| new $.Autocompleter(this, options); | |||
| }); | |||
| }, | |||
| result: function(handler) { | |||
| return this.bind("result", handler); | |||
| }, | |||
| search: function(handler) { | |||
| return this.trigger("search", [handler]); | |||
| }, | |||
| flushCache: function() { | |||
| return this.trigger("flushCache"); | |||
| }, | |||
| setOptions: function(options){ | |||
| return this.trigger("setOptions", [options]); | |||
| }, | |||
| unautocomplete: function() { | |||
| return this.trigger("unautocomplete"); | |||
| } | |||
| }); | |||
| $.Autocompleter = function(input, options) { | |||
| var KEY = { | |||
| UP: 38, | |||
| DOWN: 40, | |||
| DEL: 46, | |||
| TAB: 9, | |||
| RETURN: 13, | |||
| ESC: 27, | |||
| COMMA: 188, | |||
| PAGEUP: 33, | |||
| PAGEDOWN: 34, | |||
| BACKSPACE: 8 | |||
| }; | |||
| // Create $ object for input element | |||
| var $input = $(input).attr("autocomplete", "off").addClass(options.inputClass); | |||
| var timeout; | |||
| var previousValue = ""; | |||
| var cache = $.Autocompleter.Cache(options); | |||
| var hasFocus = 0; | |||
| var lastKeyPressCode; | |||
| var config = { | |||
| mouseDownOnSelect: false | |||
| }; | |||
| var select = $.Autocompleter.Select(options, input, selectCurrent, config); | |||
| var blockSubmit; | |||
| // prevent form submit in opera when selecting with return key | |||
| $.browser.opera && $(input.form).bind("submit.autocomplete", function() { | |||
| if (blockSubmit) { | |||
| blockSubmit = false; | |||
| return false; | |||
| } | |||
| }); | |||
| // only opera doesn't trigger keydown multiple times while pressed, others don't work with keypress at all | |||
| $input.bind(($.browser.opera ? "keypress" : "keydown") + ".autocomplete", function(event) { | |||
| // track last key pressed | |||
| lastKeyPressCode = event.keyCode; | |||
| switch(event.keyCode) { | |||
| case KEY.UP: | |||
| event.preventDefault(); | |||
| if ( select.visible() ) { | |||
| select.prev(); | |||
| } else { | |||
| onChange(0, true); | |||
| } | |||
| break; | |||
| case KEY.DOWN: | |||
| event.preventDefault(); | |||
| if ( select.visible() ) { | |||
| select.next(); | |||
| } else { | |||
| onChange(0, true); | |||
| } | |||
| break; | |||
| case KEY.PAGEUP: | |||
| event.preventDefault(); | |||
| if ( select.visible() ) { | |||
| select.pageUp(); | |||
| } else { | |||
| onChange(0, true); | |||
| } | |||
| break; | |||
| case KEY.PAGEDOWN: | |||
| event.preventDefault(); | |||
| if ( select.visible() ) { | |||
| select.pageDown(); | |||
| } else { | |||
| onChange(0, true); | |||
| } | |||
| break; | |||
| // matches also semicolon | |||
| case options.multiple && $.trim(options.multipleSeparator) == "," && KEY.COMMA: | |||
| case KEY.TAB: | |||
| case KEY.RETURN: | |||
| if( selectCurrent() ) { | |||
| // stop default to prevent a form submit, Opera needs special handling | |||
| event.preventDefault(); | |||
| blockSubmit = true; | |||
| return false; | |||
| } | |||
| break; | |||
| case KEY.ESC: | |||
| select.hide(); | |||
| break; | |||
| default: | |||
| clearTimeout(timeout); | |||
| timeout = setTimeout(onChange, options.delay); | |||
| break; | |||
| } | |||
| }).focus(function(){ | |||
| // track whether the field has focus, we shouldn't process any | |||
| // results if the field no longer has focus | |||
| hasFocus++; | |||
| }).blur(function() { | |||
| hasFocus = 0; | |||
| if (!config.mouseDownOnSelect) { | |||
| hideResults(); | |||
| } | |||
| }).click(function() { | |||
| // show select when clicking in a focused field | |||
| if ( hasFocus++ > 1 && !select.visible() ) { | |||
| onChange(0, true); | |||
| } | |||
| }).bind("search", function() { | |||
| // TODO why not just specifying both arguments? | |||
| var fn = (arguments.length > 1) ? arguments[1] : null; | |||
| function findValueCallback(q, data) { | |||
| var result; | |||
| if( data && data.length ) { | |||
| for (var i=0; i < data.length; i++) { | |||
| if( data[i].result.toLowerCase() == q.toLowerCase() ) { | |||
| result = data[i]; | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| if( typeof fn == "function" ) fn(result); | |||
| else $input.trigger("result", result && [result.data, result.value]); | |||
| } | |||
| $.each(trimWords($input.val()), function(i, value) { | |||
| request(value, findValueCallback, findValueCallback); | |||
| }); | |||
| }).bind("flushCache", function() { | |||
| cache.flush(); | |||
| }).bind("setOptions", function() { | |||
| $.extend(options, arguments[1]); | |||
| // if we've updated the data, repopulate | |||
| if ( "data" in arguments[1] ) | |||
| cache.populate(); | |||
| }).bind("unautocomplete", function() { | |||
| select.unbind(); | |||
| $input.unbind(); | |||
| $(input.form).unbind(".autocomplete"); | |||
| }); | |||
| function selectCurrent() { | |||
| var selected = select.selected(); | |||
| if( !selected ) | |||
| return false; | |||
| var v = selected.result; | |||
| previousValue = v; | |||
| if ( options.multiple ) { | |||
| var words = trimWords($input.val()); | |||
| if ( words.length > 1 ) { | |||
| v = words.slice(0, words.length - 1).join( options.multipleSeparator ) + options.multipleSeparator + v; | |||
| } | |||
| v += options.multipleSeparator; | |||
| } | |||
| $input.val(v); | |||
| hideResultsNow(); | |||
| $input.trigger("result", [selected.data, selected.value]); | |||
| return true; | |||
| } | |||
| function onChange(crap, skipPrevCheck) { | |||
| if( lastKeyPressCode == KEY.DEL ) { | |||
| select.hide(); | |||
| return; | |||
| } | |||
| var currentValue = $input.val(); | |||
| if ( !skipPrevCheck && currentValue == previousValue ) | |||
| return; | |||
| previousValue = currentValue; | |||
| currentValue = lastWord(currentValue); | |||
| if ( currentValue.length >= options.minChars) { | |||
| $input.addClass(options.loadingClass); | |||
| if (!options.matchCase) | |||
| currentValue = currentValue.toLowerCase(); | |||
| request(currentValue, receiveData, hideResultsNow); | |||
| } else { | |||
| stopLoading(); | |||
| select.hide(); | |||
| } | |||
| }; | |||
| function trimWords(value) { | |||
| if ( !value ) { | |||
| return [""]; | |||
| } | |||
| var words = value.split( options.multipleSeparator ); | |||
| var result = []; | |||
| $.each(words, function(i, value) { | |||
| if ( $.trim(value) ) | |||
| result[i] = $.trim(value); | |||
| }); | |||
| return result; | |||
| } | |||
| function lastWord(value) { | |||
| if ( !options.multiple ) | |||
| return value; | |||
| var words = trimWords(value); | |||
| return words[words.length - 1]; | |||
| } | |||
| // fills in the input box w/the first match (assumed to be the best match) | |||
| // q: the term entered | |||
| // sValue: the first matching result | |||
| function autoFill(q, sValue){ | |||
| // autofill in the complete box w/the first match as long as the user hasn't entered in more data | |||
| // if the last user key pressed was backspace, don't autofill | |||
| if( options.autoFill && (lastWord($input.val()).toLowerCase() == q.toLowerCase()) && lastKeyPressCode != KEY.BACKSPACE ) { | |||
| // fill in the value (keep the case the user has typed) | |||
| $input.val($input.val() + sValue.substring(lastWord(previousValue).length)); | |||
| // select the portion of the value not typed by the user (so the next character will erase) | |||
| $.Autocompleter.Selection(input, previousValue.length, previousValue.length + sValue.length); | |||
| } | |||
| }; | |||
| function hideResults() { | |||
| clearTimeout(timeout); | |||
| timeout = setTimeout(hideResultsNow, 200); | |||
| }; | |||
| function hideResultsNow() { | |||
| var wasVisible = select.visible(); | |||
| select.hide(); | |||
| clearTimeout(timeout); | |||
| stopLoading(); | |||
| if (options.mustMatch) { | |||
| // call search and run callback | |||
| $input.search( | |||
| function (result){ | |||
| // if no value found, clear the input box | |||
| if( !result ) { | |||
| if (options.multiple) { | |||
| var words = trimWords($input.val()).slice(0, -1); | |||
| $input.val( words.join(options.multipleSeparator) + (words.length ? options.multipleSeparator : "") ); | |||
| } | |||
| else | |||
| $input.val( "" ); | |||
| } | |||
| } | |||
| ); | |||
| } | |||
| if (wasVisible) | |||
| // position cursor at end of input field | |||
| $.Autocompleter.Selection(input, input.value.length, input.value.length); | |||
| }; | |||
| function receiveData(q, data) { | |||
| if ( data && data.length && hasFocus ) { | |||
| stopLoading(); | |||
| select.display(data, q); | |||
| autoFill(q, data[0].value); | |||
| select.show(); | |||
| } else { | |||
| hideResultsNow(); | |||
| } | |||
| }; | |||
| function request(term, success, failure) { | |||
| if (!options.matchCase) | |||
| term = term.toLowerCase(); | |||
| var data = cache.load(term); | |||
| data = null; // Avoid buggy cache and go to Solr every time | |||
| // recieve the cached data | |||
| if (data && data.length) { | |||
| success(term, data); | |||
| // if an AJAX url has been supplied, try loading the data now | |||
| } else if( (typeof options.url == "string") && (options.url.length > 0) ){ | |||
| var extraParams = { | |||
| timestamp: +new Date() | |||
| }; | |||
| $.each(options.extraParams, function(key, param) { | |||
| extraParams[key] = typeof param == "function" ? param() : param; | |||
| }); | |||
| $.ajax({ | |||
| // try to leverage ajaxQueue plugin to abort previous requests | |||
| mode: "abort", | |||
| // limit abortion to this input | |||
| port: "autocomplete" + input.name, | |||
| dataType: options.dataType, | |||
| url: options.url, | |||
| data: $.extend({ | |||
| q: lastWord(term), | |||
| limit: options.max | |||
| }, extraParams), | |||
| success: function(data) { | |||
| var parsed = options.parse && options.parse(data) || parse(data); | |||
| cache.add(term, parsed); | |||
| success(term, parsed); | |||
| } | |||
| }); | |||
| } else { | |||
| // if we have a failure, we need to empty the list -- this prevents the the [TAB] key from selecting the last successful match | |||
| select.emptyList(); | |||
| failure(term); | |||
| } | |||
| }; | |||
| function parse(data) { | |||
| var parsed = []; | |||
| var rows = data.split("\n"); | |||
| for (var i=0; i < rows.length; i++) { | |||
| var row = $.trim(rows[i]); | |||
| if (row) { | |||
| row = row.split("|"); | |||
| parsed[parsed.length] = { | |||
| data: row, | |||
| value: row[0], | |||
| result: options.formatResult && options.formatResult(row, row[0]) || row[0] | |||
| }; | |||
| } | |||
| } | |||
| return parsed; | |||
| }; | |||
| function stopLoading() { | |||
| $input.removeClass(options.loadingClass); | |||
| }; | |||
| }; | |||
| $.Autocompleter.defaults = { | |||
| inputClass: "ac_input", | |||
| resultsClass: "ac_results", | |||
| loadingClass: "ac_loading", | |||
| minChars: 1, | |||
| delay: 400, | |||
| matchCase: false, | |||
| matchSubset: true, | |||
| matchContains: false, | |||
| cacheLength: 10, | |||
| max: 100, | |||
| mustMatch: false, | |||
| extraParams: {}, | |||
| selectFirst: false, | |||
| formatItem: function(row) { return row[0]; }, | |||
| formatMatch: null, | |||
| autoFill: false, | |||
| width: 0, | |||
| multiple: false, | |||
| multipleSeparator: ", ", | |||
| highlight: function(value, term) { | |||
| return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1") + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "<strong>$1</strong>"); | |||
| }, | |||
| scroll: true, | |||
| scrollHeight: 180 | |||
| }; | |||
| $.Autocompleter.Cache = function(options) { | |||
| var data = {}; | |||
| var length = 0; | |||
| function matchSubset(s, sub) { | |||
| if (!options.matchCase) | |||
| s = s.toLowerCase(); | |||
| var i = s.indexOf(sub); | |||
| if (options.matchContains == "word"){ | |||
| i = s.toLowerCase().search("\\b" + sub.toLowerCase()); | |||
| } | |||
| if (i == -1) return false; | |||
| return i == 0 || options.matchContains; | |||
| }; | |||
| function add(q, value) { | |||
| if (length > options.cacheLength){ | |||
| flush(); | |||
| } | |||
| if (!data[q]){ | |||
| length++; | |||
| } | |||
| data[q] = value; | |||
| } | |||
| function populate(){ | |||
| if( !options.data ) return false; | |||
| // track the matches | |||
| var stMatchSets = {}, | |||
| nullData = 0; | |||
| // no url was specified, we need to adjust the cache length to make sure it fits the local data store | |||
| if( !options.url ) options.cacheLength = 1; | |||
| // track all options for minChars = 0 | |||
| stMatchSets[""] = []; | |||
| // loop through the array and create a lookup structure | |||
| for ( var i = 0, ol = options.data.length; i < ol; i++ ) { | |||
| var rawValue = options.data[i]; | |||
| // if rawValue is a string, make an array otherwise just reference the array | |||
| rawValue = (typeof rawValue == "string") ? [rawValue] : rawValue; | |||
| var value = options.formatMatch(rawValue, i+1, options.data.length); | |||
| if ( value === false ) | |||
| continue; | |||
| var firstChar = value.charAt(0).toLowerCase(); | |||
| // if no lookup array for this character exists, look it up now | |||
| if( !stMatchSets[firstChar] ) | |||
| stMatchSets[firstChar] = []; | |||
| // if the match is a string | |||
| var row = { | |||
| value: value, | |||
| data: rawValue, | |||
| result: options.formatResult && options.formatResult(rawValue) || value | |||
| }; | |||
| // push the current match into the set list | |||
| stMatchSets[firstChar].push(row); | |||
| // keep track of minChars zero items | |||
| if ( nullData++ < options.max ) { | |||
| stMatchSets[""].push(row); | |||
| } | |||
| }; | |||
| // add the data items to the cache | |||
| $.each(stMatchSets, function(i, value) { | |||
| // increase the cache size | |||
| options.cacheLength++; | |||
| // add to the cache | |||
| add(i, value); | |||
| }); | |||
| } | |||
| // populate any existing data | |||
| setTimeout(populate, 25); | |||
| function flush(){ | |||
| data = {}; | |||
| length = 0; | |||
| } | |||
| return { | |||
| flush: flush, | |||
| add: add, | |||
| populate: populate, | |||
| load: function(q) { | |||
| if (!options.cacheLength || !length) | |||
| return null; | |||
| /* | |||
| * if dealing w/local data and matchContains than we must make sure | |||
| * to loop through all the data collections looking for matches | |||
| */ | |||
| if( !options.url && options.matchContains ){ | |||
| // track all matches | |||
| var csub = []; | |||
| // loop through all the data grids for matches | |||
| for( var k in data ){ | |||
| // don't search through the stMatchSets[""] (minChars: 0) cache | |||
| // this prevents duplicates | |||
| if( k.length > 0 ){ | |||
| var c = data[k]; | |||
| $.each(c, function(i, x) { | |||
| // if we've got a match, add it to the array | |||
| if (matchSubset(x.value, q)) { | |||
| csub.push(x); | |||
| } | |||
| }); | |||
| } | |||
| } | |||
| return csub; | |||
| } else | |||
| // if the exact item exists, use it | |||
| if (data[q]){ | |||
| return data[q]; | |||
| } else | |||
| if (options.matchSubset) { | |||
| for (var i = q.length - 1; i >= options.minChars; i--) { | |||
| var c = data[q.substr(0, i)]; | |||
| if (c) { | |||
| var csub = []; | |||
| $.each(c, function(i, x) { | |||
| if (matchSubset(x.value, q)) { | |||
| csub[csub.length] = x; | |||
| } | |||
| }); | |||
| return csub; | |||
| } | |||
| } | |||
| } | |||
| return null; | |||
| } | |||
| }; | |||
| }; | |||
| $.Autocompleter.Select = function (options, input, select, config) { | |||
| var CLASSES = { | |||
| ACTIVE: "ac_over" | |||
| }; | |||
| var listItems, | |||
| active = -1, | |||
| data, | |||
| term = "", | |||
| needsInit = true, | |||
| element, | |||
| list; | |||
| // Create results | |||
| function init() { | |||
| if (!needsInit) | |||
| return; | |||
| element = $("<div/>") | |||
| .hide() | |||
| .addClass(options.resultsClass) | |||
| .css("position", "absolute") | |||
| .appendTo(document.body); | |||
| list = $("<ul/>").appendTo(element).mouseover( function(event) { | |||
| if(target(event).nodeName && target(event).nodeName.toUpperCase() == 'LI') { | |||
| active = $("li", list).removeClass(CLASSES.ACTIVE).index(target(event)); | |||
| $(target(event)).addClass(CLASSES.ACTIVE); | |||
| } | |||
| }).click(function(event) { | |||
| $(target(event)).addClass(CLASSES.ACTIVE); | |||
| select(); | |||
| // TODO provide option to avoid setting focus again after selection? useful for cleanup-on-focus | |||
| input.focus(); | |||
| return false; | |||
| }).mousedown(function() { | |||
| config.mouseDownOnSelect = true; | |||
| }).mouseup(function() { | |||
| config.mouseDownOnSelect = false; | |||
| }); | |||
| if( options.width > 0 ) | |||
| element.css("width", options.width); | |||
| needsInit = false; | |||
| } | |||
| function target(event) { | |||
| var element = event.target; | |||
| while(element && element.tagName != "LI") | |||
| element = element.parentNode; | |||
| // more fun with IE, sometimes event.target is empty, just ignore it then | |||
| if(!element) | |||
| return []; | |||
| return element; | |||
| } | |||
| function moveSelect(step) { | |||
| listItems.slice(active, active + 1).removeClass(CLASSES.ACTIVE); | |||
| movePosition(step); | |||
| var activeItem = listItems.slice(active, active + 1).addClass(CLASSES.ACTIVE); | |||
| if(options.scroll) { | |||
| var offset = 0; | |||
| listItems.slice(0, active).each(function() { | |||
| offset += this.offsetHeight; | |||
| }); | |||
| if((offset + activeItem[0].offsetHeight - list.scrollTop()) > list[0].clientHeight) { | |||
| list.scrollTop(offset + activeItem[0].offsetHeight - list.innerHeight()); | |||
| } else if(offset < list.scrollTop()) { | |||
| list.scrollTop(offset); | |||
| } | |||
| } | |||
| }; | |||
| function movePosition(step) { | |||
| active += step; | |||
| if (active < 0) { | |||
| active = listItems.size() - 1; | |||
| } else if (active >= listItems.size()) { | |||
| active = 0; | |||
| } | |||
| } | |||
| function limitNumberOfItems(available) { | |||
| return options.max && options.max < available | |||
| ? options.max | |||
| : available; | |||
| } | |||
| function fillList() { | |||
| list.empty(); | |||
| var max = limitNumberOfItems(data.length); | |||
| for (var i=0; i < max; i++) { | |||
| if (!data[i]) | |||
| continue; | |||
| var formatted = options.formatItem(data[i].data, i+1, max, data[i].value, term); | |||
| if ( formatted === false ) | |||
| continue; | |||
| var li = $("<li/>").html( options.highlight(formatted, term) ).addClass(i%2 == 0 ? "ac_even" : "ac_odd").appendTo(list)[0]; | |||
| $.data(li, "ac_data", data[i]); | |||
| } | |||
| listItems = list.find("li"); | |||
| if ( options.selectFirst ) { | |||
| listItems.slice(0, 1).addClass(CLASSES.ACTIVE); | |||
| active = 0; | |||
| } | |||
| // apply bgiframe if available | |||
| if ( $.fn.bgiframe ) | |||
| list.bgiframe(); | |||
| } | |||
| return { | |||
| display: function(d, q) { | |||
| init(); | |||
| data = d; | |||
| term = q; | |||
| fillList(); | |||
| }, | |||
| next: function() { | |||
| moveSelect(1); | |||
| }, | |||
| prev: function() { | |||
| moveSelect(-1); | |||
| }, | |||
| pageUp: function() { | |||
| if (active != 0 && active - 8 < 0) { | |||
| moveSelect( -active ); | |||
| } else { | |||
| moveSelect(-8); | |||
| } | |||
| }, | |||
| pageDown: function() { | |||
| if (active != listItems.size() - 1 && active + 8 > listItems.size()) { | |||
| moveSelect( listItems.size() - 1 - active ); | |||
| } else { | |||
| moveSelect(8); | |||
| } | |||
| }, | |||
| hide: function() { | |||
| element && element.hide(); | |||
| listItems && listItems.removeClass(CLASSES.ACTIVE); | |||
| active = -1; | |||
| }, | |||
| visible : function() { | |||
| return element && element.is(":visible"); | |||
| }, | |||
| current: function() { | |||
| return this.visible() && (listItems.filter("." + CLASSES.ACTIVE)[0] || options.selectFirst && listItems[0]); | |||
| }, | |||
| show: function() { | |||
| var offset = $(input).offset(); | |||
| element.css({ | |||
| width: typeof options.width == "string" || options.width > 0 ? options.width : $(input).width(), | |||
| top: offset.top + input.offsetHeight, | |||
| left: offset.left | |||
| }).show(); | |||
| if(options.scroll) { | |||
| list.scrollTop(0); | |||
| list.css({ | |||
| maxHeight: options.scrollHeight, | |||
| overflow: 'auto' | |||
| }); | |||
| if($.browser.msie && typeof document.body.style.maxHeight === "undefined") { | |||
| var listHeight = 0; | |||
| listItems.each(function() { | |||
| listHeight += this.offsetHeight; | |||
| }); | |||
| var scrollbarsVisible = listHeight > options.scrollHeight; | |||
| list.css('height', scrollbarsVisible ? options.scrollHeight : listHeight ); | |||
| if (!scrollbarsVisible) { | |||
| // IE doesn't recalculate width when scrollbar disappears | |||
| listItems.width( list.width() - parseInt(listItems.css("padding-left")) - parseInt(listItems.css("padding-right")) ); | |||
| } | |||
| } | |||
| } | |||
| }, | |||
| selected: function() { | |||
| var selected = listItems && listItems.filter("." + CLASSES.ACTIVE).removeClass(CLASSES.ACTIVE); | |||
| return selected && selected.length && $.data(selected[0], "ac_data"); | |||
| }, | |||
| emptyList: function (){ | |||
| list && list.empty(); | |||
| }, | |||
| unbind: function() { | |||
| element && element.remove(); | |||
| } | |||
| }; | |||
| }; | |||
| $.Autocompleter.Selection = function(field, start, end) { | |||
| if( field.createTextRange ){ | |||
| var selRange = field.createTextRange(); | |||
| selRange.collapse(true); | |||
| selRange.moveStart("character", start); | |||
| selRange.moveEnd("character", end); | |||
| selRange.select(); | |||
| } else if( field.setSelectionRange ){ | |||
| field.setSelectionRange(start, end); | |||
| } else { | |||
| if( field.selectionStart ){ | |||
| field.selectionStart = start; | |||
| field.selectionEnd = end; | |||
| } | |||
| } | |||
| field.focus(); | |||
| }; | |||
| })(jQuery); | |||
| @@ -0,0 +1,70 @@ | |||
| /* | |||
| * ---------------------------------------------------------------------------- | |||
| * "THE BEER-WARE LICENSE" (Revision 42): | |||
| * Tuxes3 wrote this file. As long as you retain this notice you | |||
| * can do whatever you want with this stuff. If we meet some day, and you think | |||
| * this stuff is worth it, you can buy me a beer in return Tuxes3 | |||
| * ---------------------------------------------------------------------------- | |||
| */ | |||
| (function($) | |||
| { | |||
| var settings; | |||
| $.fn.tx3TagCloud = function(options) | |||
| { | |||
| // | |||
| // DEFAULT SETTINGS | |||
| // | |||
| settings = $.extend({ | |||
| multiplier : 1 | |||
| }, options); | |||
| main(this); | |||
| } | |||
| function main(element) | |||
| { | |||
| // adding style attr | |||
| element.addClass("tx3-tag-cloud"); | |||
| addListElementFontSize(element); | |||
| } | |||
| /** | |||
| * calculates the font size on each li element | |||
| * according to their data-weight attribut | |||
| */ | |||
| function addListElementFontSize(element) | |||
| { | |||
| var hDataWeight = -9007199254740992; | |||
| var lDataWeight = 9007199254740992; | |||
| $.each(element.find("li"), function(){ | |||
| cDataWeight = getDataWeight(this); | |||
| if (cDataWeight == undefined) | |||
| { | |||
| logWarning("No \"data-weight\" attribut defined on <li> element"); | |||
| } | |||
| else | |||
| { | |||
| hDataWeight = cDataWeight > hDataWeight ? cDataWeight : hDataWeight; | |||
| lDataWeight = cDataWeight < lDataWeight ? cDataWeight : lDataWeight; | |||
| } | |||
| }); | |||
| $.each(element.find("li"), function(){ | |||
| var dataWeight = getDataWeight(this); | |||
| var percent = Math.abs((dataWeight - lDataWeight)/(lDataWeight - hDataWeight)); | |||
| $(this).css('font-size', (1 + (percent * settings['multiplier'])) + "em"); | |||
| }); | |||
| } | |||
| function getDataWeight(element) | |||
| { | |||
| return parseInt($(element).attr("data-weight")); | |||
| } | |||
| function logWarning(message) | |||
| { | |||
| console.log("[WARNING] " + Date.now() + " : " + message); | |||
| } | |||
| }(jQuery)); | |||
| @@ -0,0 +1,42 @@ | |||
| <html> | |||
| <head> | |||
| #parse("head.vm") | |||
| </head> | |||
| <body> | |||
| <div id="header"> | |||
| <a href="#url_for_home"><img src="#{url_root}/img/solr.svg" id="logo" title="Solr"/></a> $resource.powered_file_search | |||
| </div> | |||
| <div id="header2" onclick="javascript:locale_select()"> | |||
| <ul class="menu"> | |||
| <li> | |||
| <a href="#"><img src="#{url_for_solr}/admin/file?file=/velocity/img/globe_256.png&contentType=image/png" id="locale_pic" title="locale_select" width="30px" height="27px"/></a> | |||
| <ul> | |||
| <li><a href="#url_for_locale('fr_FR')" #if("#current_locale"=="fr_FR")class="hidden"#end> | |||
| <img src="#{url_for_solr}/admin/file?file=/velocity/img/france_640.png&contentType=image/png" id="french_flag" width="40px" height="40px"/>Français</a></li> | |||
| <li><a href="#url_for_locale('de_DE')" #if("#current_locale"=="de_DE")class="hidden"#end> | |||
| <img src="#{url_for_solr}/admin/file?file=/velocity/img/germany_640.png&contentType=image/png" id="german_flag" width="40px" height="40px"/>Deutsch</a></li> | |||
| <li><a href="#url_for_locale('')" #if("#current_locale"=="")class="hidden"#end> | |||
| <img src="#{url_for_solr}/admin/file?file=/velocity/img/english_640.png&contentType=image/png" id="english_flag" width="40px" height="40px"/>English</a></li> | |||
| </ul> | |||
| </li> | |||
| </ul> | |||
| </div> | |||
| #if($response.response.error.code) | |||
| <div class="error"> | |||
| <h1>ERROR $response.response.error.code</h1> | |||
| $response.response.error.msg | |||
| </div> | |||
| #else | |||
| <div id="content"> | |||
| $content | |||
| </div> | |||
| #end | |||
| <div id="footer"> | |||
| #parse("footer.vm") | |||
| </div> | |||
| </body> | |||
| </html> | |||
| @@ -0,0 +1,16 @@ | |||
| #macro(lensFilterSortOnly)?#if($response.responseHeader.params.getAll("fq").size() > 0)&#fqs($response.responseHeader.params.getAll("fq"))#end#sort($request.params.getParams('sort'))#end | |||
| #macro(lensNoQ)#lensFilterSortOnly&type=#current_type#if("#current_locale"!="")&locale=#current_locale#end#end | |||
| #macro(lensNoType)#lensFilterSortOnly#q#if("#current_locale"!="")&locale=#current_locale#end#end | |||
| #macro(lensNoLocale)#lensFilterSortOnly#q&type=#current_type#end | |||
| ## lens modified for example/files - to use fq from responseHeader rather than request, and #debug removed too as it is built into browse params now, also added type to lens | |||
| #macro(lens)#lensNoQ#q#end | |||
| ## Macros defined custom for the "files" example | |||
| #macro(url_for_type $type)#url_for_home#lensNoType&type=$type#end | |||
| #macro(current_type)#if($response.responseHeader.params.type)${response.responseHeader.params.type}#{else}all#end#end | |||
| #macro(url_for_locale $locale)#url_for_home#lensNoLocale#if($locale!="")&locale=$locale#end&start=$page.start#end | |||
| #macro(current_locale)$!{response.responseHeader.params.locale}#end | |||
| ## Usage: #label(resource_key[, default_value]) - resource_key is used as label if no default value specified and no resource exists | |||
| #macro(label $key $default)#if($resource.get($key).exists)${resource.get($key)}#else#if($default)$default#else${key}#end#end#end | |||
| @@ -0,0 +1,68 @@ | |||
| #** | |||
| * Define some Mime-Types, short and long form | |||
| *# | |||
| ## MimeType to extension map for detecting file type | |||
| ## and showing proper icon | |||
| ## List of types match the icons in /solr/img/filetypes | |||
| ## Short MimeType Names | |||
| ## Was called $supportedtypes | |||
| #set($supportedMimeTypes = "7z;ai;aiff;asc;audio;bin;bz2;c;cfc;cfm;chm;class;conf;cpp;cs;css;csv;deb;divx;doc;dot;eml;enc;file;gif;gz;hlp;htm;html;image;iso;jar;java;jpeg;jpg;js;lua;m;mm;mov;mp3;mpg;odc;odf;odg;odi;odp;ods;odt;ogg;pdf;pgp;php;pl;png;ppt;ps;py;ram;rar;rb;rm;rpm;rtf;sig;sql;swf;sxc;sxd;sxi;sxw;tar;tex;tgz;txt;vcf;video;vsd;wav;wma;wmv;xls;xml;xpi;xvid;zip") | |||
| ## Long Form: map MimeType headers to our Short names | |||
| ## Was called $extMap | |||
| #set( $mimeExtensionsMap = { | |||
| "application/x-7z-compressed": "7z", | |||
| "application/postscript": "ai", | |||
| "application/pgp-signature": "asc", | |||
| "application/octet-stream": "bin", | |||
| "application/x-bzip2": "bz2", | |||
| "text/x-c": "c", | |||
| "application/vnd.ms-htmlhelp": "chm", | |||
| "application/java-vm": "class", | |||
| "text/css": "css", | |||
| "text/csv": "csv", | |||
| "application/x-debian-package": "deb", | |||
| "application/msword": "doc", | |||
| "message/rfc822": "eml", | |||
| "image/gif": "gif", | |||
| "application/winhlp": "hlp", | |||
| "text/html": "html", | |||
| "application/java-archive": "jar", | |||
| "text/x-java-source": "java", | |||
| "image/jpeg": "jpeg", | |||
| "application/javascript": "js", | |||
| "application/vnd.oasis.opendocument.chart": "odc", | |||
| "application/vnd.oasis.opendocument.formula": "odf", | |||
| "application/vnd.oasis.opendocument.graphics": "odg", | |||
| "application/vnd.oasis.opendocument.image": "odi", | |||
| "application/vnd.oasis.opendocument.presentation": "odp", | |||
| "application/vnd.oasis.opendocument.spreadsheet": "ods", | |||
| "application/vnd.oasis.opendocument.text": "odt", | |||
| "application/pdf": "pdf", | |||
| "application/pgp-encrypted": "pgp", | |||
| "image/png": "png", | |||
| "application/vnd.ms-powerpoint": "ppt", | |||
| "audio/x-pn-realaudio": "ram", | |||
| "application/x-rar-compressed": "rar", | |||
| "application/vnd.rn-realmedia": "rm", | |||
| "application/rtf": "rtf", | |||
| "application/x-shockwave-flash": "swf", | |||
| "application/vnd.sun.xml.calc": "sxc", | |||
| "application/vnd.sun.xml.draw": "sxd", | |||
| "application/vnd.sun.xml.impress": "sxi", | |||
| "application/vnd.sun.xml.writer": "sxw", | |||
| "application/x-tar": "tar", | |||
| "application/x-tex": "tex", | |||
| "text/plain": "txt", | |||
| "text/x-vcard": "vcf", | |||
| "application/vnd.visio": "vsd", | |||
| "audio/x-wav": "wav", | |||
| "audio/x-ms-wma": "wma", | |||
| "video/x-ms-wmv": "wmv", | |||
| "application/vnd.ms-excel": "xls", | |||
| "application/xml": "xml", | |||
| "application/x-xpinstall": "xpi", | |||
| "application/zip": "zip" | |||
| }) | |||
| @@ -0,0 +1,20 @@ | |||
| <div id="facets"> | |||
| #parse("facets.vm") | |||
| </div> | |||
| <div id="results_list"> | |||
| <div class="pagination"> | |||
| <span class="results-found">$page.results_found</span> $resource.results_found_in.insert(${response.responseHeader.QTime}) | |||
| $resource.page_of.insert($page.current_page_number,$page.page_count) | |||
| </div> | |||
| #parse("results_list.vm") | |||
| <div class="pagination"> | |||
| #link_to_previous_page | |||
| <span class="results-found">$page.results_found</span> $resource.results_found. | |||
| $resource.page_of.insert($page.current_page_number,$page.page_count) | |||
| #link_to_next_page | |||
| </div> | |||
| </div> | |||
| @@ -0,0 +1,21 @@ | |||
| <ul id="tabs"> | |||
| <li><a href="#url_for_type('all')" #if("#current_type"=="all")class="selected"#end>$resource.type.all ($response.response.facet_counts.facet_queries.all_types)</a></li> | |||
| #foreach($type in $response.response.facet_counts.facet_fields.doc_type) | |||
| #if($type.key) | |||
| <li><a href="#url_for_type($type.key)" #if($type.value=="0")class="no_results"#end #if("#current_type"==$type.key)class="selected"#end> #label("type.${type.key}.label", $type.key) ($type.value)</a></li> | |||
| #else | |||
| #if($type.value > 0) | |||
| <li><a href="#url_for_type('unknown')" #if("#current_type"=="unknown")class="selected"#end>$resource.type.unknown ($type.value)</a></li> | |||
| #end | |||
| #end | |||
| #end | |||
| </ul> | |||
| <div id="results"> | |||
| #foreach($doc in $response.results) | |||
| #parse("hit.vm") | |||
| #end | |||
| </div> | |||
| @@ -42,9 +42,9 @@ Help() | |||
| Import() | |||
| { | |||
| docker exec -it solr solr create_core -c $core | |||
| docker exec -it solr solr create_core -c $core -d custom | |||
| docker exec -ti --user=solr solr bash -c "cp -r /opt/solr/example/files/conf/* /var/solr/data/$core/conf/" | |||
| #docker exec -ti --user=solr solr bash -c "cp -r /opt/solr/example/files/conf/* /var/solr/data/$core/conf/" | |||
| docker restart solr | |||
| @@ -72,7 +72,7 @@ while getopts ":hlimzaes" option; do | |||
| exit;; | |||
| z) # index all | |||
| core="all" | |||
| location="data/2018 (10381)" | |||
| location="data/pop_rtfs" | |||
| Import | |||
| exit;; | |||
| a) # index ACTIVE folder | |||