| - '8983:8983' | - '8983:8983' | ||||
| volumes: | volumes: | ||||
| - solrdata:/var/solr | - solrdata:/var/solr | ||||
| - ./solr_config:/opt/solr/server/solr/configsets/custom | |||||
| volumes: | volumes: | ||||
| solrdata: | solrdata: |
| <br><br> | <br><br> | ||||
| Year: | |||||
| <?php | |||||
| echo $result['year']; | |||||
| ?> | |||||
| <br><br> | |||||
| EPO publication: | EPO publication: | ||||
| <a href=<?php echo $result['epo_publication_url']; ?>> | <a href=<?php echo $result['epo_publication_url']; ?>> |
| <option value="multispecies">multi-species</option> | <option value="multispecies">multi-species</option> | ||||
| <option value="surviving">surviving</option> | <option value="surviving">surviving</option> | ||||
| </select> | </select> | ||||
| sort by: | |||||
| <select name="sort" id="sort"> | |||||
| <option value="relevance">relevance</option> | |||||
| <option value="year">year</option> | |||||
| </select> | |||||
| <input type="submit" id="submit" value="search"> | <input type="submit" id="submit" value="search"> | ||||
| </form> | </form> | ||||
| </div> | </div> |
| <?php | <?php | ||||
| $search_results = solr_search($_POST["search"], $_POST["searchopt"]); | |||||
| $search_results = solr_search($_POST["search"], $_POST["searchopt"], $_POST["sort"]); | |||||
| if(is_array($search_results)): | if(is_array($search_results)): | ||||
| <br><br> | <br><br> | ||||
| Year: | |||||
| <?php | |||||
| echo $result['year']; | |||||
| ?> | |||||
| <br><br> | |||||
| EPO publication: | EPO publication: | ||||
| <a href=<?php echo $result['epo_publication_url']; ?>> | <a href=<?php echo $result['epo_publication_url']; ?>> | ||||
| <br><br> | <br><br> | ||||
| <?php | <?php | ||||
| if ($result['abstract']): | |||||
| if (isset($result['abstract'])): | |||||
| ?> | ?> | ||||
| Abstract: | Abstract: |
| <?php | <?php | ||||
| function solr_search($search, $core){ | |||||
| function solr_search($search, $core, $sort){ | |||||
| // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||||
| $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json'; | |||||
| if ($sort == 'relevance'){ | |||||
| // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||||
| $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json'; | |||||
| } | |||||
| else{ | |||||
| // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||||
| $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json&sort=' . $sort . '%20asc'; | |||||
| } | |||||
| // Perform Curl request on the Solr API | // Perform Curl request on the Solr API | ||||
| $ch = curl_init(); | $ch = curl_init(); | ||||
| elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) { | elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) { | ||||
| $output['abstract'] = $abstract[1]; | $output['abstract'] = $abstract[1]; | ||||
| } | } | ||||
| // Search for the year in the content element and display it | |||||
| if (preg_match('/=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})/', $input, $year)){ | |||||
| $output['year'] = $year[1]; | |||||
| } | |||||
| return $output; | return $output; | ||||
| } | } | ||||
| <?xml version="1.0" ?> | |||||
| <!-- | |||||
| Licensed to the Apache Software Foundation (ASF) under one or more | |||||
| contributor license agreements. See the NOTICE file distributed with | |||||
| this work for additional information regarding copyright ownership. | |||||
| The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| (the "License"); you may not use this file except in compliance with | |||||
| the License. You may obtain a copy of the License at | |||||
| http://www.apache.org/licenses/LICENSE-2.0 | |||||
| Unless required by applicable law or agreed to in writing, software | |||||
| distributed under the License is distributed on an "AS IS" BASIS, | |||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| See the License for the specific language governing permissions and | |||||
| limitations under the License. | |||||
| --> | |||||
| <!-- Example exchange rates file for CurrencyField type named "currency" in example schema --> | |||||
| <currencyConfig version="1.0"> | |||||
| <rates> | |||||
| <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 --> | |||||
| <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" /> | |||||
| <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" /> | |||||
| <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" /> | |||||
| <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" /> | |||||
| <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" /> | |||||
| <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" /> | |||||
| <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" /> | |||||
| <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" /> | |||||
| <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" /> | |||||
| <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" /> | |||||
| <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" /> | |||||
| <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" /> | |||||
| <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" /> | |||||
| <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" /> | |||||
| <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" /> | |||||
| <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" /> | |||||
| <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" /> | |||||
| <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" /> | |||||
| <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" /> | |||||
| <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" /> | |||||
| <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" /> | |||||
| <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" /> | |||||
| <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" /> | |||||
| <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" /> | |||||
| <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" /> | |||||
| <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" /> | |||||
| <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" /> | |||||
| <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" /> | |||||
| <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" /> | |||||
| <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" /> | |||||
| <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" /> | |||||
| <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" /> | |||||
| <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" /> | |||||
| <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" /> | |||||
| <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" /> | |||||
| <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" /> | |||||
| <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" /> | |||||
| <!-- Cross-rates for some common currencies --> | |||||
| <rate from="EUR" to="GBP" rate="0.869914" /> | |||||
| <rate from="EUR" to="NOK" rate="7.800095" /> | |||||
| <rate from="GBP" to="NOK" rate="8.966508" /> | |||||
| </rates> | |||||
| </currencyConfig> |
| <?xml version="1.0" encoding="UTF-8" ?> | |||||
| <!-- | |||||
| Licensed to the Apache Software Foundation (ASF) under one or more | |||||
| contributor license agreements. See the NOTICE file distributed with | |||||
| this work for additional information regarding copyright ownership. | |||||
| The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| (the "License"); you may not use this file except in compliance with | |||||
| the License. You may obtain a copy of the License at | |||||
| http://www.apache.org/licenses/LICENSE-2.0 | |||||
| Unless required by applicable law or agreed to in writing, software | |||||
| distributed under the License is distributed on an "AS IS" BASIS, | |||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| See the License for the specific language governing permissions and | |||||
| limitations under the License. | |||||
| --> | |||||
| <!-- If this file is found in the config directory, it will only be | |||||
| loaded once at startup. If it is found in Solr's data | |||||
| directory, it will be re-loaded every commit. | |||||
| See http://wiki.apache.org/solr/QueryElevationComponent for more info | |||||
| --> | |||||
| <elevate> | |||||
| <!-- Query elevation examples | |||||
| <query text="foo bar"> | |||||
| <doc id="1" /> | |||||
| <doc id="2" /> | |||||
| <doc id="3" /> | |||||
| </query> | |||||
| for use with techproducts example | |||||
| <query text="ipod"> | |||||
| <doc id="MA147LL/A" /> put the actual ipod at the top | |||||
| <doc id="IW-02" exclude="true" /> exclude this cable | |||||
| </query> | |||||
| --> | |||||
| </elevate> |
| <URL> | |||||
| <EMAIL> |
| # Set of Catalan contractions for ElisionFilter | |||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
| d | |||||
| l | |||||
| m | |||||
| n | |||||
| s | |||||
| t |
| # Set of French contractions for ElisionFilter | |||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
| l | |||||
| m | |||||
| t | |||||
| qu | |||||
| n | |||||
| s | |||||
| j | |||||
| d | |||||
| c | |||||
| jusqu | |||||
| quoiqu | |||||
| lorsqu | |||||
| puisqu |
| # Set of Irish contractions for ElisionFilter | |||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
| d | |||||
| m | |||||
| b |
| # Set of Italian contractions for ElisionFilter | |||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
| c | |||||
| l | |||||
| all | |||||
| dall | |||||
| dell | |||||
| nell | |||||
| sull | |||||
| coll | |||||
| pell | |||||
| gl | |||||
| agl | |||||
| dagl | |||||
| degl | |||||
| negl | |||||
| sugl | |||||
| un | |||||
| m | |||||
| t | |||||
| s | |||||
| v | |||||
| d |
| # Set of Irish hyphenations for StopFilter | |||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
| h | |||||
| n | |||||
| t |
| # Set of overrides for the dutch stemmer | |||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
| fiets fiets | |||||
| bromfiets bromfiets | |||||
| ei eier | |||||
| kind kinder |
| # | |||||
| # This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. | |||||
| # | |||||
| # Any token with a part-of-speech tag that exactly matches those defined in this | |||||
| # file are removed from the token stream. | |||||
| # | |||||
| # Set your own stoptags by uncommenting the lines below. Note that comments are | |||||
| # not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, | |||||
| # etc. that can be useful for building you own stoptag set. | |||||
| # | |||||
| # The entire possible tagset is provided below for convenience. | |||||
| # | |||||
| ##### | |||||
| # noun: unclassified nouns | |||||
| #名詞 | |||||
| # | |||||
| # noun-common: Common nouns or nouns where the sub-classification is undefined | |||||
| #名詞-一般 | |||||
| # | |||||
| # noun-proper: Proper nouns where the sub-classification is undefined | |||||
| #名詞-固有名詞 | |||||
| # | |||||
| # noun-proper-misc: miscellaneous proper nouns | |||||
| #名詞-固有名詞-一般 | |||||
| # | |||||
| # noun-proper-person: Personal names where the sub-classification is undefined | |||||
| #名詞-固有名詞-人名 | |||||
| # | |||||
| # noun-proper-person-misc: names that cannot be divided into surname and | |||||
| # given name; foreign names; names where the surname or given name is unknown. | |||||
| # e.g. お市の方 | |||||
| #名詞-固有名詞-人名-一般 | |||||
| # | |||||
| # noun-proper-person-surname: Mainly Japanese surnames. | |||||
| # e.g. 山田 | |||||
| #名詞-固有名詞-人名-姓 | |||||
| # | |||||
| # noun-proper-person-given_name: Mainly Japanese given names. | |||||
| # e.g. 太郎 | |||||
| #名詞-固有名詞-人名-名 | |||||
| # | |||||
| # noun-proper-organization: Names representing organizations. | |||||
| # e.g. 通産省, NHK | |||||
| #名詞-固有名詞-組織 | |||||
| # | |||||
| # noun-proper-place: Place names where the sub-classification is undefined | |||||
| #名詞-固有名詞-地域 | |||||
| # | |||||
| # noun-proper-place-misc: Place names excluding countries. | |||||
| # e.g. アジア, バルセロナ, 京都 | |||||
| #名詞-固有名詞-地域-一般 | |||||
| # | |||||
| # noun-proper-place-country: Country names. | |||||
| # e.g. 日本, オーストラリア | |||||
| #名詞-固有名詞-地域-国 | |||||
| # | |||||
| # noun-pronoun: Pronouns where the sub-classification is undefined | |||||
| #名詞-代名詞 | |||||
| # | |||||
| # noun-pronoun-misc: miscellaneous pronouns: | |||||
| # e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ | |||||
| #名詞-代名詞-一般 | |||||
| # | |||||
| # noun-pronoun-contraction: Spoken language contraction made by combining a | |||||
| # pronoun and the particle 'wa'. | |||||
| # e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ | |||||
| #名詞-代名詞-縮約 | |||||
| # | |||||
| # noun-adverbial: Temporal nouns such as names of days or months that behave | |||||
| # like adverbs. Nouns that represent amount or ratios and can be used adverbially, | |||||
| # e.g. 金曜, 一月, 午後, 少量 | |||||
| #名詞-副詞可能 | |||||
| # | |||||
| # noun-verbal: Nouns that take arguments with case and can appear followed by | |||||
| # 'suru' and related verbs (する, できる, なさる, くださる) | |||||
| # e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り | |||||
| #名詞-サ変接続 | |||||
| # | |||||
| # noun-adjective-base: The base form of adjectives, words that appear before な ("na") | |||||
| # e.g. 健康, 安易, 駄目, だめ | |||||
| #名詞-形容動詞語幹 | |||||
| # | |||||
| # noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. | |||||
| # e.g. 0, 1, 2, 何, 数, 幾 | |||||
| #名詞-数 | |||||
| # | |||||
| # noun-affix: noun affixes where the sub-classification is undefined | |||||
| #名詞-非自立 | |||||
| # | |||||
| # noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that | |||||
| # attach to the base form of inflectional words, words that cannot be classified | |||||
| # into any of the other categories below. This category includes indefinite nouns. | |||||
| # e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, | |||||
| # 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, | |||||
| # 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, | |||||
| # わり, 割り, 割, ん-口語/, もん-口語/ | |||||
| #名詞-非自立-一般 | |||||
| # | |||||
| # noun-affix-adverbial: noun affixes that that can behave as adverbs. | |||||
| # e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, | |||||
| # 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, | |||||
| # 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, | |||||
| # とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, | |||||
| # 儘, 侭, みぎり, 矢先 | |||||
| #名詞-非自立-副詞可能 | |||||
| # | |||||
| # noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars | |||||
| # with the stem よう(だ) ("you(da)"). | |||||
| # e.g. よう, やう, 様 (よう) | |||||
| #名詞-非自立-助動詞語幹 | |||||
| # | |||||
| # noun-affix-adjective-base: noun affixes that can connect to the indeclinable | |||||
| # connection form な (aux "da"). | |||||
| # e.g. みたい, ふう | |||||
| #名詞-非自立-形容動詞語幹 | |||||
| # | |||||
| # noun-special: special nouns where the sub-classification is undefined. | |||||
| #名詞-特殊 | |||||
| # | |||||
| # noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is | |||||
| # treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base | |||||
| # form of inflectional words. | |||||
| # e.g. そう | |||||
| #名詞-特殊-助動詞語幹 | |||||
| # | |||||
| # noun-suffix: noun suffixes where the sub-classification is undefined. | |||||
| #名詞-接尾 | |||||
| # | |||||
| # noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect | |||||
| # to ガル or タイ and can combine into compound nouns, words that cannot be classified into | |||||
| # any of the other categories below. In general, this category is more inclusive than | |||||
| # 接尾語 ("suffix") and is usually the last element in a compound noun. | |||||
| # e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, | |||||
| # よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 | |||||
| #名詞-接尾-一般 | |||||
| # | |||||
| # noun-suffix-person: Suffixes that form nouns and attach to person names more often | |||||
| # than other nouns. | |||||
| # e.g. 君, 様, 著 | |||||
| #名詞-接尾-人名 | |||||
| # | |||||
| # noun-suffix-place: Suffixes that form nouns and attach to place names more often | |||||
| # than other nouns. | |||||
| # e.g. 町, 市, 県 | |||||
| #名詞-接尾-地域 | |||||
| # | |||||
| # noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that | |||||
| # can appear before スル ("suru"). | |||||
| # e.g. 化, 視, 分け, 入り, 落ち, 買い | |||||
| #名詞-接尾-サ変接続 | |||||
| # | |||||
| # noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, | |||||
| # is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the | |||||
| # conjunctive form of inflectional words. | |||||
| # e.g. そう | |||||
| #名詞-接尾-助動詞語幹 | |||||
| # | |||||
| # noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive | |||||
| # form of inflectional words and appear before the copula だ ("da"). | |||||
| # e.g. 的, げ, がち | |||||
| #名詞-接尾-形容動詞語幹 | |||||
| # | |||||
| # noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. | |||||
| # e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) | |||||
| #名詞-接尾-副詞可能 | |||||
| # | |||||
| # noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category | |||||
| # is more inclusive than 助数詞 ("classifier") and includes common nouns that attach | |||||
| # to numbers. | |||||
| # e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 | |||||
| #名詞-接尾-助数詞 | |||||
| # | |||||
| # noun-suffix-special: Special suffixes that mainly attach to inflecting words. | |||||
| # e.g. (楽し) さ, (考え) 方 | |||||
| #名詞-接尾-特殊 | |||||
| # | |||||
| # noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words | |||||
| # together. | |||||
| # e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) | |||||
| #名詞-接続詞的 | |||||
| # | |||||
| # noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are | |||||
| # semantically verb-like. | |||||
| # e.g. ごらん, ご覧, 御覧, 頂戴 | |||||
| #名詞-動詞非自立的 | |||||
| # | |||||
| # noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, | |||||
| # dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") | |||||
| # is いわく ("iwaku"). | |||||
| #名詞-引用文字列 | |||||
| # | |||||
| # noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and | |||||
| # behave like an adjective. | |||||
| # e.g. 申し訳, 仕方, とんでも, 違い | |||||
| #名詞-ナイ形容詞語幹 | |||||
| # | |||||
| ##### | |||||
| # prefix: unclassified prefixes | |||||
| #接頭詞 | |||||
| # | |||||
| # prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) | |||||
| # excluding numerical expressions. | |||||
| # e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) | |||||
| #接頭詞-名詞接続 | |||||
| # | |||||
| # prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb | |||||
| # in conjunctive form followed by なる/なさる/くださる. | |||||
| # e.g. お (読みなさい), お (座り) | |||||
| #接頭詞-動詞接続 | |||||
| # | |||||
| # prefix-adjectival: Prefixes that attach to adjectives. | |||||
| # e.g. お (寒いですねえ), バカ (でかい) | |||||
| #接頭詞-形容詞接続 | |||||
| # | |||||
| # prefix-numerical: Prefixes that attach to numerical expressions. | |||||
| # e.g. 約, およそ, 毎時 | |||||
| #接頭詞-数接続 | |||||
| # | |||||
| ##### | |||||
| # verb: unclassified verbs | |||||
| #動詞 | |||||
| # | |||||
| # verb-main: | |||||
| #動詞-自立 | |||||
| # | |||||
| # verb-auxiliary: | |||||
| #動詞-非自立 | |||||
| # | |||||
| # verb-suffix: | |||||
| #動詞-接尾 | |||||
| # | |||||
| ##### | |||||
| # adjective: unclassified adjectives | |||||
| #形容詞 | |||||
| # | |||||
| # adjective-main: | |||||
| #形容詞-自立 | |||||
| # | |||||
| # adjective-auxiliary: | |||||
| #形容詞-非自立 | |||||
| # | |||||
| # adjective-suffix: | |||||
| #形容詞-接尾 | |||||
| # | |||||
| ##### | |||||
| # adverb: unclassified adverbs | |||||
| #副詞 | |||||
| # | |||||
| # adverb-misc: Words that can be segmented into one unit and where adnominal | |||||
| # modification is not possible. | |||||
| # e.g. あいかわらず, 多分 | |||||
| #副詞-一般 | |||||
| # | |||||
| # adverb-particle_conjunction: Adverbs that can be followed by の, は, に, | |||||
| # な, する, だ, etc. | |||||
| # e.g. こんなに, そんなに, あんなに, なにか, なんでも | |||||
| #副詞-助詞類接続 | |||||
| # | |||||
| ##### | |||||
| # adnominal: Words that only have noun-modifying forms. | |||||
| # e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, | |||||
| # どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, | |||||
| # 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き | |||||
| #連体詞 | |||||
| # | |||||
| ##### | |||||
| # conjunction: Conjunctions that can occur independently. | |||||
| # e.g. が, けれども, そして, じゃあ, それどころか | |||||
| 接続詞 | |||||
| # | |||||
| ##### | |||||
| # particle: unclassified particles. | |||||
| 助詞 | |||||
| # | |||||
| # particle-case: case particles where the subclassification is undefined. | |||||
| 助詞-格助詞 | |||||
| # | |||||
| # particle-case-misc: Case particles. | |||||
| # e.g. から, が, で, と, に, へ, より, を, の, にて | |||||
| 助詞-格助詞-一般 | |||||
| # | |||||
| # particle-case-quote: the "to" that appears after nouns, a person’s speech, | |||||
| # quotation marks, expressions of decisions from a meeting, reasons, judgements, | |||||
| # conjectures, etc. | |||||
| # e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) | |||||
| 助詞-格助詞-引用 | |||||
| # | |||||
| # particle-case-compound: Compounds of particles and verbs that mainly behave | |||||
| # like case particles. | |||||
| # e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, | |||||
| # にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, | |||||
| # にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, | |||||
| # に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, | |||||
| # に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, | |||||
| # にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, | |||||
| # にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, | |||||
| # って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ | |||||
| 助詞-格助詞-連語 | |||||
| # | |||||
| # particle-conjunctive: | |||||
| # e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, | |||||
| # ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, | |||||
| # (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ | |||||
| 助詞-接続助詞 | |||||
| # | |||||
| # particle-dependency: | |||||
| # e.g. こそ, さえ, しか, すら, は, も, ぞ | |||||
| 助詞-係助詞 | |||||
| # | |||||
| # particle-adverbial: | |||||
| # e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, | |||||
| # (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, | |||||
| # (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, | |||||
| # (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, | |||||
| # ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) | |||||
| 助詞-副助詞 | |||||
| # | |||||
| # particle-interjective: particles with interjective grammatical roles. | |||||
| # e.g. (松島) や | |||||
| 助詞-間投助詞 | |||||
| # | |||||
| # particle-coordinate: | |||||
| # e.g. と, たり, だの, だり, とか, なり, や, やら | |||||
| 助詞-並立助詞 | |||||
| # | |||||
| # particle-final: | |||||
| # e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, | |||||
| # ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ | |||||
| 助詞-終助詞 | |||||
| # | |||||
| # particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is | |||||
| # adverbial, conjunctive, or sentence final. For example: | |||||
| # (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 | |||||
| # (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 | |||||
| # 「(祈りが届いたせい) か (, 試験に合格した.)」 | |||||
| # (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 | |||||
| # e.g. か | |||||
| 助詞-副助詞/並立助詞/終助詞 | |||||
| # | |||||
| # particle-adnominalizer: The "no" that attaches to nouns and modifies | |||||
| # non-inflectional words. | |||||
| 助詞-連体化 | |||||
| # | |||||
| # particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs | |||||
| # that are giongo, giseigo, or gitaigo. | |||||
| # e.g. に, と | |||||
| 助詞-副詞化 | |||||
| # | |||||
| # particle-special: A particle that does not fit into one of the above classifications. | |||||
| # This includes particles that are used in Tanka, Haiku, and other poetry. | |||||
| # e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) | |||||
| 助詞-特殊 | |||||
| # | |||||
| ##### | |||||
| # auxiliary-verb: | |||||
| 助動詞 | |||||
| # | |||||
| ##### | |||||
| # interjection: Greetings and other exclamations. | |||||
| # e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, | |||||
| # いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい | |||||
| #感動詞 | |||||
| # | |||||
| ##### | |||||
| # symbol: unclassified Symbols. | |||||
| 記号 | |||||
| # | |||||
| # symbol-misc: A general symbol not in one of the categories below. | |||||
| # e.g. [○◎@$〒→+] | |||||
| 記号-一般 | |||||
| # | |||||
| # symbol-comma: Commas | |||||
| # e.g. [,、] | |||||
| 記号-読点 | |||||
| # | |||||
| # symbol-period: Periods and full stops. | |||||
| # e.g. [..。] | |||||
| 記号-句点 | |||||
| # | |||||
| # symbol-space: Full-width whitespace. | |||||
| 記号-空白 | |||||
| # | |||||
| # symbol-open_bracket: | |||||
| # e.g. [({‘“『【] | |||||
| 記号-括弧開 | |||||
| # | |||||
| # symbol-close_bracket: | |||||
| # e.g. [)}’”』」】] | |||||
| 記号-括弧閉 | |||||
| # | |||||
| # symbol-alphabetic: | |||||
| #記号-アルファベット | |||||
| # | |||||
| ##### | |||||
| # other: unclassified other | |||||
| #その他 | |||||
| # | |||||
| # other-interjection: Words that are hard to classify as noun-suffixes or | |||||
| # sentence-final particles. | |||||
| # e.g. (だ)ァ | |||||
| その他-間投 | |||||
| # | |||||
| ##### | |||||
| # filler: Aizuchi that occurs during a conversation or sounds inserted as filler. | |||||
| # e.g. あの, うんと, えと | |||||
| フィラー | |||||
| # | |||||
| ##### | |||||
| # non-verbal: non-verbal sound. | |||||
| 非言語音 | |||||
| # | |||||
| ##### | |||||
| # fragment: | |||||
| #語断片 | |||||
| # | |||||
| ##### | |||||
| # unknown: unknown part of speech. | |||||
| #未知語 | |||||
| # | |||||
| ##### End of file |
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| # Cleaned on October 11, 2009 (not normalized, so use before normalization) | |||||
| # This means that when modifying this list, you might need to add some | |||||
| # redundant entries, for example containing forms with both أ and ا | |||||
| من | |||||
| ومن | |||||
| منها | |||||
| منه | |||||
| في | |||||
| وفي | |||||
| فيها | |||||
| فيه | |||||
| و | |||||
| ف | |||||
| ثم | |||||
| او | |||||
| أو | |||||
| ب | |||||
| بها | |||||
| به | |||||
| ا | |||||
| أ | |||||
| اى | |||||
| اي | |||||
| أي | |||||
| أى | |||||
| لا | |||||
| ولا | |||||
| الا | |||||
| ألا | |||||
| إلا | |||||
| لكن | |||||
| ما | |||||
| وما | |||||
| كما | |||||
| فما | |||||
| عن | |||||
| مع | |||||
| اذا | |||||
| إذا | |||||
| ان | |||||
| أن | |||||
| إن | |||||
| انها | |||||
| أنها | |||||
| إنها | |||||
| انه | |||||
| أنه | |||||
| إنه | |||||
| بان | |||||
| بأن | |||||
| فان | |||||
| فأن | |||||
| وان | |||||
| وأن | |||||
| وإن | |||||
| التى | |||||
| التي | |||||
| الذى | |||||
| الذي | |||||
| الذين | |||||
| الى | |||||
| الي | |||||
| إلى | |||||
| إلي | |||||
| على | |||||
| عليها | |||||
| عليه | |||||
| اما | |||||
| أما | |||||
| إما | |||||
| ايضا | |||||
| أيضا | |||||
| كل | |||||
| وكل | |||||
| لم | |||||
| ولم | |||||
| لن | |||||
| ولن | |||||
| هى | |||||
| هي | |||||
| هو | |||||
| وهى | |||||
| وهي | |||||
| وهو | |||||
| فهى | |||||
| فهي | |||||
| فهو | |||||
| انت | |||||
| أنت | |||||
| لك | |||||
| لها | |||||
| له | |||||
| هذه | |||||
| هذا | |||||
| تلك | |||||
| ذلك | |||||
| هناك | |||||
| كانت | |||||
| كان | |||||
| يكون | |||||
| تكون | |||||
| وكانت | |||||
| وكان | |||||
| غير | |||||
| بعض | |||||
| قد | |||||
| نحو | |||||
| بين | |||||
| بينما | |||||
| منذ | |||||
| ضمن | |||||
| حيث | |||||
| الان | |||||
| الآن | |||||
| خلال | |||||
| بعد | |||||
| قبل | |||||
| حتى | |||||
| عند | |||||
| عندما | |||||
| لدى | |||||
| جميع |
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| а | |||||
| аз | |||||
| ако | |||||
| ала | |||||
| бе | |||||
| без | |||||
| беше | |||||
| би | |||||
| бил | |||||
| била | |||||
| били | |||||
| било | |||||
| близо | |||||
| бъдат | |||||
| бъде | |||||
| бяха | |||||
| в | |||||
| вас | |||||
| ваш | |||||
| ваша | |||||
| вероятно | |||||
| вече | |||||
| взема | |||||
| ви | |||||
| вие | |||||
| винаги | |||||
| все | |||||
| всеки | |||||
| всички | |||||
| всичко | |||||
| всяка | |||||
| във | |||||
| въпреки | |||||
| върху | |||||
| г | |||||
| ги | |||||
| главно | |||||
| го | |||||
| д | |||||
| да | |||||
| дали | |||||
| до | |||||
| докато | |||||
| докога | |||||
| дори | |||||
| досега | |||||
| доста | |||||
| е | |||||
| едва | |||||
| един | |||||
| ето | |||||
| за | |||||
| зад | |||||
| заедно | |||||
| заради | |||||
| засега | |||||
| затова | |||||
| защо | |||||
| защото | |||||
| и | |||||
| из | |||||
| или | |||||
| им | |||||
| има | |||||
| имат | |||||
| иска | |||||
| й | |||||
| каза | |||||
| как | |||||
| каква | |||||
| какво | |||||
| както | |||||
| какъв | |||||
| като | |||||
| кога | |||||
| когато | |||||
| което | |||||
| които | |||||
| кой | |||||
| който | |||||
| колко | |||||
| която | |||||
| къде | |||||
| където | |||||
| към | |||||
| ли | |||||
| м | |||||
| ме | |||||
| между | |||||
| мен | |||||
| ми | |||||
| мнозина | |||||
| мога | |||||
| могат | |||||
| може | |||||
| моля | |||||
| момента | |||||
| му | |||||
| н | |||||
| на | |||||
| над | |||||
| назад | |||||
| най | |||||
| направи | |||||
| напред | |||||
| например | |||||
| нас | |||||
| не | |||||
| него | |||||
| нея | |||||
| ни | |||||
| ние | |||||
| никой | |||||
| нито | |||||
| но | |||||
| някои | |||||
| някой | |||||
| няма | |||||
| обаче | |||||
| около | |||||
| освен | |||||
| особено | |||||
| от | |||||
| отгоре | |||||
| отново | |||||
| още | |||||
| пак | |||||
| по | |||||
| повече | |||||
| повечето | |||||
| под | |||||
| поне | |||||
| поради | |||||
| после | |||||
| почти | |||||
| прави | |||||
| пред | |||||
| преди | |||||
| през | |||||
| при | |||||
| пък | |||||
| първо | |||||
| с | |||||
| са | |||||
| само | |||||
| се | |||||
| сега | |||||
| си | |||||
| скоро | |||||
| след | |||||
| сме | |||||
| според | |||||
| сред | |||||
| срещу | |||||
| сте | |||||
| съм | |||||
| със | |||||
| също | |||||
| т | |||||
| тази | |||||
| така | |||||
| такива | |||||
| такъв | |||||
| там | |||||
| твой | |||||
| те | |||||
| тези | |||||
| ти | |||||
| тн | |||||
| то | |||||
| това | |||||
| тогава | |||||
| този | |||||
| той | |||||
| толкова | |||||
| точно | |||||
| трябва | |||||
| тук | |||||
| тъй | |||||
| тя | |||||
| тях | |||||
| у | |||||
| харесва | |||||
| ч | |||||
| че | |||||
| често | |||||
| чрез | |||||
| ще | |||||
| щом | |||||
| я |
| # Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) | |||||
| a | |||||
| abans | |||||
| ací | |||||
| ah | |||||
| així | |||||
| això | |||||
| al | |||||
| als | |||||
| aleshores | |||||
| algun | |||||
| alguna | |||||
| algunes | |||||
| alguns | |||||
| alhora | |||||
| allà | |||||
| allí | |||||
| allò | |||||
| altra | |||||
| altre | |||||
| altres | |||||
| amb | |||||
| ambdós | |||||
| ambdues | |||||
| apa | |||||
| aquell | |||||
| aquella | |||||
| aquelles | |||||
| aquells | |||||
| aquest | |||||
| aquesta | |||||
| aquestes | |||||
| aquests | |||||
| aquí | |||||
| baix | |||||
| cada | |||||
| cadascú | |||||
| cadascuna | |||||
| cadascunes | |||||
| cadascuns | |||||
| com | |||||
| contra | |||||
| d'un | |||||
| d'una | |||||
| d'unes | |||||
| d'uns | |||||
| dalt | |||||
| de | |||||
| del | |||||
| dels | |||||
| des | |||||
| després | |||||
| dins | |||||
| dintre | |||||
| donat | |||||
| doncs | |||||
| durant | |||||
| e | |||||
| eh | |||||
| el | |||||
| els | |||||
| em | |||||
| en | |||||
| encara | |||||
| ens | |||||
| entre | |||||
| érem | |||||
| eren | |||||
| éreu | |||||
| es | |||||
| és | |||||
| esta | |||||
| està | |||||
| estàvem | |||||
| estaven | |||||
| estàveu | |||||
| esteu | |||||
| et | |||||
| etc | |||||
| ets | |||||
| fins | |||||
| fora | |||||
| gairebé | |||||
| ha | |||||
| han | |||||
| has | |||||
| havia | |||||
| he | |||||
| hem | |||||
| heu | |||||
| hi | |||||
| ho | |||||
| i | |||||
| igual | |||||
| iguals | |||||
| ja | |||||
| l'hi | |||||
| la | |||||
| les | |||||
| li | |||||
| li'n | |||||
| llavors | |||||
| m'he | |||||
| ma | |||||
| mal | |||||
| malgrat | |||||
| mateix | |||||
| mateixa | |||||
| mateixes | |||||
| mateixos | |||||
| me | |||||
| mentre | |||||
| més | |||||
| meu | |||||
| meus | |||||
| meva | |||||
| meves | |||||
| molt | |||||
| molta | |||||
| moltes | |||||
| molts | |||||
| mon | |||||
| mons | |||||
| n'he | |||||
| n'hi | |||||
| ne | |||||
| ni | |||||
| no | |||||
| nogensmenys | |||||
| només | |||||
| nosaltres | |||||
| nostra | |||||
| nostre | |||||
| nostres | |||||
| o | |||||
| oh | |||||
| oi | |||||
| on | |||||
| pas | |||||
| pel | |||||
| pels | |||||
| per | |||||
| però | |||||
| perquè | |||||
| poc | |||||
| poca | |||||
| pocs | |||||
| poques | |||||
| potser | |||||
| propi | |||||
| qual | |||||
| quals | |||||
| quan | |||||
| quant | |||||
| que | |||||
| què | |||||
| quelcom | |||||
| qui | |||||
| quin | |||||
| quina | |||||
| quines | |||||
| quins | |||||
| s'ha | |||||
| s'han | |||||
| sa | |||||
| semblant | |||||
| semblants | |||||
| ses | |||||
| seu | |||||
| seus | |||||
| seva | |||||
| seva | |||||
| seves | |||||
| si | |||||
| sobre | |||||
| sobretot | |||||
| sóc | |||||
| solament | |||||
| sols | |||||
| son | |||||
| són | |||||
| sons | |||||
| sota | |||||
| sou | |||||
| t'ha | |||||
| t'han | |||||
| t'he | |||||
| ta | |||||
| tal | |||||
| també | |||||
| tampoc | |||||
| tan | |||||
| tant | |||||
| tanta | |||||
| tantes | |||||
| teu | |||||
| teus | |||||
| teva | |||||
| teves | |||||
| ton | |||||
| tons | |||||
| tot | |||||
| tota | |||||
| totes | |||||
| tots | |||||
| un | |||||
| una | |||||
| unes | |||||
| uns | |||||
| us | |||||
| va | |||||
| vaig | |||||
| vam | |||||
| van | |||||
| vas | |||||
| veu | |||||
| vosaltres | |||||
| vostra | |||||
| vostre | |||||
| vostres |
| a | |||||
| s | |||||
| k | |||||
| o | |||||
| i | |||||
| u | |||||
| v | |||||
| z | |||||
| dnes | |||||
| cz | |||||
| tímto | |||||
| budeš | |||||
| budem | |||||
| byli | |||||
| jseš | |||||
| můj | |||||
| svým | |||||
| ta | |||||
| tomto | |||||
| tohle | |||||
| tuto | |||||
| tyto | |||||
| jej | |||||
| zda | |||||
| proč | |||||
| máte | |||||
| tato | |||||
| kam | |||||
| tohoto | |||||
| kdo | |||||
| kteří | |||||
| mi | |||||
| nám | |||||
| tom | |||||
| tomuto | |||||
| mít | |||||
| nic | |||||
| proto | |||||
| kterou | |||||
| byla | |||||
| toho | |||||
| protože | |||||
| asi | |||||
| ho | |||||
| naši | |||||
| napište | |||||
| re | |||||
| což | |||||
| tím | |||||
| takže | |||||
| svých | |||||
| její | |||||
| svými | |||||
| jste | |||||
| aj | |||||
| tu | |||||
| tedy | |||||
| teto | |||||
| bylo | |||||
| kde | |||||
| ke | |||||
| pravé | |||||
| ji | |||||
| nad | |||||
| nejsou | |||||
| či | |||||
| pod | |||||
| téma | |||||
| mezi | |||||
| přes | |||||
| ty | |||||
| pak | |||||
| vám | |||||
| ani | |||||
| když | |||||
| však | |||||
| neg | |||||
| jsem | |||||
| tento | |||||
| článku | |||||
| články | |||||
| aby | |||||
| jsme | |||||
| před | |||||
| pta | |||||
| jejich | |||||
| byl | |||||
| ještě | |||||
| až | |||||
| bez | |||||
| také | |||||
| pouze | |||||
| první | |||||
| vaše | |||||
| která | |||||
| nás | |||||
| nový | |||||
| tipy | |||||
| pokud | |||||
| může | |||||
| strana | |||||
| jeho | |||||
| své | |||||
| jiné | |||||
| zprávy | |||||
| nové | |||||
| není | |||||
| vás | |||||
| jen | |||||
| podle | |||||
| zde | |||||
| už | |||||
| být | |||||
| více | |||||
| bude | |||||
| již | |||||
| než | |||||
| který | |||||
| by | |||||
| které | |||||
| co | |||||
| nebo | |||||
| ten | |||||
| tak | |||||
| má | |||||
| při | |||||
| od | |||||
| po | |||||
| jsou | |||||
| jak | |||||
| další | |||||
| ale | |||||
| si | |||||
| se | |||||
| ve | |||||
| to | |||||
| jako | |||||
| za | |||||
| zpět | |||||
| ze | |||||
| do | |||||
| pro | |||||
| je | |||||
| na | |||||
| atd | |||||
| atp | |||||
| jakmile | |||||
| přičemž | |||||
| já | |||||
| on | |||||
| ona | |||||
| ono | |||||
| oni | |||||
| ony | |||||
| my | |||||
| vy | |||||
| jí | |||||
| ji | |||||
| mě | |||||
| mne | |||||
| jemu | |||||
| tomu | |||||
| těm | |||||
| těmu | |||||
| němu | |||||
| němuž | |||||
| jehož | |||||
| jíž | |||||
| jelikož | |||||
| jež | |||||
| jakož | |||||
| načež |
| | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | A Danish stop word list. Comments begin with vertical bar. Each stop | |||||
| | word is at the start of a line. | |||||
| | This is a ranked list (commonest to rarest) of stopwords derived from | |||||
| | a large text sample. | |||||
| og | and | |||||
| i | in | |||||
| jeg | I | |||||
| det | that (dem. pronoun)/it (pers. pronoun) | |||||
| at | that (in front of a sentence)/to (with infinitive) | |||||
| en | a/an | |||||
| den | it (pers. pronoun)/that (dem. pronoun) | |||||
| til | to/at/for/until/against/by/of/into, more | |||||
| er | present tense of "to be" | |||||
| som | who, as | |||||
| på | on/upon/in/on/at/to/after/of/with/for, on | |||||
| de | they | |||||
| med | with/by/in, along | |||||
| han | he | |||||
| af | of/by/from/off/for/in/with/on, off | |||||
| for | at/for/to/from/by/of/ago, in front/before, because | |||||
| ikke | not | |||||
| der | who/which, there/those | |||||
| var | past tense of "to be" | |||||
| mig | me/myself | |||||
| sig | oneself/himself/herself/itself/themselves | |||||
| men | but | |||||
| et | a/an/one, one (number), someone/somebody/one | |||||
| har | present tense of "to have" | |||||
| om | round/about/for/in/a, about/around/down, if | |||||
| vi | we | |||||
| min | my | |||||
| havde | past tense of "to have" | |||||
| ham | him | |||||
| hun | she | |||||
| nu | now | |||||
| over | over/above/across/by/beyond/past/on/about, over/past | |||||
| da | then, when/as/since | |||||
| fra | from/off/since, off, since | |||||
| du | you | |||||
| ud | out | |||||
| sin | his/her/its/one's | |||||
| dem | them | |||||
| os | us/ourselves | |||||
| op | up | |||||
| man | you/one | |||||
| hans | his | |||||
| hvor | where | |||||
| eller | or | |||||
| hvad | what | |||||
| skal | must/shall etc. | |||||
| selv | myself/youself/herself/ourselves etc., even | |||||
| her | here | |||||
| alle | all/everyone/everybody etc. | |||||
| vil | will (verb) | |||||
| blev | past tense of "to stay/to remain/to get/to become" | |||||
| kunne | could | |||||
| ind | in | |||||
| når | when | |||||
| være | present tense of "to be" | |||||
| dog | however/yet/after all | |||||
| noget | something | |||||
| ville | would | |||||
| jo | you know/you see (adv), yes | |||||
| deres | their/theirs | |||||
| efter | after/behind/according to/for/by/from, later/afterwards | |||||
| ned | down | |||||
| skulle | should | |||||
| denne | this | |||||
| end | than | |||||
| dette | this | |||||
| mit | my/mine | |||||
| også | also | |||||
| under | under/beneath/below/during, below/underneath | |||||
| have | have | |||||
| dig | you | |||||
| anden | other | |||||
| hende | her | |||||
| mine | my | |||||
| alt | everything | |||||
| meget | much/very, plenty of | |||||
| sit | his, her, its, one's | |||||
| sine | his, her, its, one's | |||||
| vor | our | |||||
| mod | against | |||||
| disse | these | |||||
| hvis | if | |||||
| din | your/yours | |||||
| nogle | some | |||||
| hos | by/at | |||||
| blive | be/become | |||||
| mange | many | |||||
| ad | by/through | |||||
| bliver | present tense of "to be/to become" | |||||
| hendes | her/hers | |||||
| været | be | |||||
| thi | for (conj) | |||||
| jer | you | |||||
| sådan | such, like this/like that |
| | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | A German stop word list. Comments begin with vertical bar. Each stop | |||||
| | word is at the start of a line. | |||||
| | The number of forms in this list is reduced significantly by passing it | |||||
| | through the German stemmer. | |||||
| aber | but | |||||
| alle | all | |||||
| allem | |||||
| allen | |||||
| aller | |||||
| alles | |||||
| als | than, as | |||||
| also | so | |||||
| am | an + dem | |||||
| an | at | |||||
| ander | other | |||||
| andere | |||||
| anderem | |||||
| anderen | |||||
| anderer | |||||
| anderes | |||||
| anderm | |||||
| andern | |||||
| anderr | |||||
| anders | |||||
| auch | also | |||||
| auf | on | |||||
| aus | out of | |||||
| bei | by | |||||
| bin | am | |||||
| bis | until | |||||
| bist | art | |||||
| da | there | |||||
| damit | with it | |||||
| dann | then | |||||
| der | the | |||||
| den | |||||
| des | |||||
| dem | |||||
| die | |||||
| das | |||||
| daß | that | |||||
| derselbe | the same | |||||
| derselben | |||||
| denselben | |||||
| desselben | |||||
| demselben | |||||
| dieselbe | |||||
| dieselben | |||||
| dasselbe | |||||
| dazu | to that | |||||
| dein | thy | |||||
| deine | |||||
| deinem | |||||
| deinen | |||||
| deiner | |||||
| deines | |||||
| denn | because | |||||
| derer | of those | |||||
| dessen | of him | |||||
| dich | thee | |||||
| dir | to thee | |||||
| du | thou | |||||
| dies | this | |||||
| diese | |||||
| diesem | |||||
| diesen | |||||
| dieser | |||||
| dieses | |||||
| doch | (several meanings) | |||||
| dort | (over) there | |||||
| durch | through | |||||
| ein | a | |||||
| eine | |||||
| einem | |||||
| einen | |||||
| einer | |||||
| eines | |||||
| einig | some | |||||
| einige | |||||
| einigem | |||||
| einigen | |||||
| einiger | |||||
| einiges | |||||
| einmal | once | |||||
| er | he | |||||
| ihn | him | |||||
| ihm | to him | |||||
| es | it | |||||
| etwas | something | |||||
| euer | your | |||||
| eure | |||||
| eurem | |||||
| euren | |||||
| eurer | |||||
| eures | |||||
| für | for | |||||
| gegen | towards | |||||
| gewesen | p.p. of sein | |||||
| hab | have | |||||
| habe | have | |||||
| haben | have | |||||
| hat | has | |||||
| hatte | had | |||||
| hatten | had | |||||
| hier | here | |||||
| hin | there | |||||
| hinter | behind | |||||
| ich | I | |||||
| mich | me | |||||
| mir | to me | |||||
| ihr | you, to her | |||||
| ihre | |||||
| ihrem | |||||
| ihren | |||||
| ihrer | |||||
| ihres | |||||
| euch | to you | |||||
| im | in + dem | |||||
| in | in | |||||
| indem | while | |||||
| ins | in + das | |||||
| ist | is | |||||
| jede | each, every | |||||
| jedem | |||||
| jeden | |||||
| jeder | |||||
| jedes | |||||
| jene | that | |||||
| jenem | |||||
| jenen | |||||
| jener | |||||
| jenes | |||||
| jetzt | now | |||||
| kann | can | |||||
| kein | no | |||||
| keine | |||||
| keinem | |||||
| keinen | |||||
| keiner | |||||
| keines | |||||
| können | can | |||||
| könnte | could | |||||
| machen | do | |||||
| man | one | |||||
| manche | some, many a | |||||
| manchem | |||||
| manchen | |||||
| mancher | |||||
| manches | |||||
| mein | my | |||||
| meine | |||||
| meinem | |||||
| meinen | |||||
| meiner | |||||
| meines | |||||
| mit | with | |||||
| muss | must | |||||
| musste | had to | |||||
| nach | to(wards) | |||||
| nicht | not | |||||
| nichts | nothing | |||||
| noch | still, yet | |||||
| nun | now | |||||
| nur | only | |||||
| ob | whether | |||||
| oder | or | |||||
| ohne | without | |||||
| sehr | very | |||||
| sein | his | |||||
| seine | |||||
| seinem | |||||
| seinen | |||||
| seiner | |||||
| seines | |||||
| selbst | self | |||||
| sich | herself | |||||
| sie | they, she | |||||
| ihnen | to them | |||||
| sind | are | |||||
| so | so | |||||
| solche | such | |||||
| solchem | |||||
| solchen | |||||
| solcher | |||||
| solches | |||||
| soll | shall | |||||
| sollte | should | |||||
| sondern | but | |||||
| sonst | else | |||||
| über | over | |||||
| um | about, around | |||||
| und | and | |||||
| uns | us | |||||
| unse | |||||
| unsem | |||||
| unsen | |||||
| unser | |||||
| unses | |||||
| unter | under | |||||
| viel | much | |||||
| vom | von + dem | |||||
| von | from | |||||
| vor | before | |||||
| während | while | |||||
| war | was | |||||
| waren | were | |||||
| warst | wast | |||||
| was | what | |||||
| weg | away, off | |||||
| weil | because | |||||
| weiter | further | |||||
| welche | which | |||||
| welchem | |||||
| welchen | |||||
| welcher | |||||
| welches | |||||
| wenn | when | |||||
| werde | will | |||||
| werden | will | |||||
| wie | how | |||||
| wieder | again | |||||
| will | want | |||||
| wir | we | |||||
| wird | will | |||||
| wirst | willst | |||||
| wo | where | |||||
| wollen | want | |||||
| wollte | wanted | |||||
| würde | would | |||||
| würden | would | |||||
| zu | to | |||||
| zum | zu + dem | |||||
| zur | zu + der | |||||
| zwar | indeed | |||||
| zwischen | between | |||||
| # Lucene Greek Stopwords list | |||||
| # Note: by default this file is used after GreekLowerCaseFilter, | |||||
| # so when modifying this file use 'σ' instead of 'ς' | |||||
| ο | |||||
| η | |||||
| το | |||||
| οι | |||||
| τα | |||||
| του | |||||
| τησ | |||||
| των | |||||
| τον | |||||
| την | |||||
| και | |||||
| κι | |||||
| κ | |||||
| ειμαι | |||||
| εισαι | |||||
| ειναι | |||||
| ειμαστε | |||||
| ειστε | |||||
| στο | |||||
| στον | |||||
| στη | |||||
| στην | |||||
| μα | |||||
| αλλα | |||||
| απο | |||||
| για | |||||
| προσ | |||||
| με | |||||
| σε | |||||
| ωσ | |||||
| παρα | |||||
| αντι | |||||
| κατα | |||||
| μετα | |||||
| θα | |||||
| να | |||||
| δε | |||||
| δεν | |||||
| μη | |||||
| μην | |||||
| επι | |||||
| ενω | |||||
| εαν | |||||
| αν | |||||
| τοτε | |||||
| που | |||||
| πωσ | |||||
| ποιοσ | |||||
| ποια | |||||
| ποιο | |||||
| ποιοι | |||||
| ποιεσ | |||||
| ποιων | |||||
| ποιουσ | |||||
| αυτοσ | |||||
| αυτη | |||||
| αυτο | |||||
| αυτοι | |||||
| αυτων | |||||
| αυτουσ | |||||
| αυτεσ | |||||
| αυτα | |||||
| εκεινοσ | |||||
| εκεινη | |||||
| εκεινο | |||||
| εκεινοι | |||||
| εκεινεσ | |||||
| εκεινα | |||||
| εκεινων | |||||
| εκεινουσ | |||||
| οπωσ | |||||
| ομωσ | |||||
| ισωσ | |||||
| οσο | |||||
| οτι |
| # Licensed to the Apache Software Foundation (ASF) under one or more | |||||
| # contributor license agreements. See the NOTICE file distributed with | |||||
| # this work for additional information regarding copyright ownership. | |||||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| # (the "License"); you may not use this file except in compliance with | |||||
| # the License. You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # a couple of test stopwords to test that the words are really being | |||||
| # configured from this file: | |||||
| stopworda | |||||
| stopwordb | |||||
| # Standard english stop words taken from Lucene's StopAnalyzer | |||||
| a | |||||
| an | |||||
| and | |||||
| are | |||||
| as | |||||
| at | |||||
| be | |||||
| but | |||||
| by | |||||
| for | |||||
| if | |||||
| in | |||||
| into | |||||
| is | |||||
| it | |||||
| no | |||||
| not | |||||
| of | |||||
| on | |||||
| or | |||||
| such | |||||
| that | |||||
| the | |||||
| their | |||||
| then | |||||
| there | |||||
| these | |||||
| they | |||||
| this | |||||
| to | |||||
| was | |||||
| will | |||||
| with |
| | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | A Spanish stop word list. Comments begin with vertical bar. Each stop | |||||
| | word is at the start of a line. | |||||
| | The following is a ranked list (commonest to rarest) of stopwords | |||||
| | deriving from a large sample of text. | |||||
| | Extra words have been added at the end. | |||||
| de | from, of | |||||
| la | the, her | |||||
| que | who, that | |||||
| el | the | |||||
| en | in | |||||
| y | and | |||||
| a | to | |||||
| los | the, them | |||||
| del | de + el | |||||
| se | himself, from him etc | |||||
| las | the, them | |||||
| por | for, by, etc | |||||
| un | a | |||||
| para | for | |||||
| con | with | |||||
| no | no | |||||
| una | a | |||||
| su | his, her | |||||
| al | a + el | |||||
| | es from SER | |||||
| lo | him | |||||
| como | how | |||||
| más | more | |||||
| pero | pero | |||||
| sus | su plural | |||||
| le | to him, her | |||||
| ya | already | |||||
| o | or | |||||
| | fue from SER | |||||
| este | this | |||||
| | ha from HABER | |||||
| sí | himself etc | |||||
| porque | because | |||||
| esta | this | |||||
| | son from SER | |||||
| entre | between | |||||
| | está from ESTAR | |||||
| cuando | when | |||||
| muy | very | |||||
| sin | without | |||||
| sobre | on | |||||
| | ser from SER | |||||
| | tiene from TENER | |||||
| también | also | |||||
| me | me | |||||
| hasta | until | |||||
| hay | there is/are | |||||
| donde | where | |||||
| | han from HABER | |||||
| quien | whom, that | |||||
| | están from ESTAR | |||||
| | estado from ESTAR | |||||
| desde | from | |||||
| todo | all | |||||
| nos | us | |||||
| durante | during | |||||
| | estados from ESTAR | |||||
| todos | all | |||||
| uno | a | |||||
| les | to them | |||||
| ni | nor | |||||
| contra | against | |||||
| otros | other | |||||
| | fueron from SER | |||||
| ese | that | |||||
| eso | that | |||||
| | había from HABER | |||||
| ante | before | |||||
| ellos | they | |||||
| e | and (variant of y) | |||||
| esto | this | |||||
| mí | me | |||||
| antes | before | |||||
| algunos | some | |||||
| qué | what? | |||||
| unos | a | |||||
| yo | I | |||||
| otro | other | |||||
| otras | other | |||||
| otra | other | |||||
| él | he | |||||
| tanto | so much, many | |||||
| esa | that | |||||
| estos | these | |||||
| mucho | much, many | |||||
| quienes | who | |||||
| nada | nothing | |||||
| muchos | many | |||||
| cual | who | |||||
| | sea from SER | |||||
| poco | few | |||||
| ella | she | |||||
| estar | to be | |||||
| | haber from HABER | |||||
| estas | these | |||||
| | estaba from ESTAR | |||||
| | estamos from ESTAR | |||||
| algunas | some | |||||
| algo | something | |||||
| nosotros | we | |||||
| | other forms | |||||
| mi | me | |||||
| mis | mi plural | |||||
| tú | thou | |||||
| te | thee | |||||
| ti | thee | |||||
| tu | thy | |||||
| tus | tu plural | |||||
| ellas | they | |||||
| nosotras | we | |||||
| vosotros | you | |||||
| vosotras | you | |||||
| os | you | |||||
| mío | mine | |||||
| mía | | |||||
| míos | | |||||
| mías | | |||||
| tuyo | thine | |||||
| tuya | | |||||
| tuyos | | |||||
| tuyas | | |||||
| suyo | his, hers, theirs | |||||
| suya | | |||||
| suyos | | |||||
| suyas | | |||||
| nuestro | ours | |||||
| nuestra | | |||||
| nuestros | | |||||
| nuestras | | |||||
| vuestro | yours | |||||
| vuestra | | |||||
| vuestros | | |||||
| vuestras | | |||||
| esos | those | |||||
| esas | those | |||||
| | forms of estar, to be (not including the infinitive): | |||||
| estoy | |||||
| estás | |||||
| está | |||||
| estamos | |||||
| estáis | |||||
| están | |||||
| esté | |||||
| estés | |||||
| estemos | |||||
| estéis | |||||
| estén | |||||
| estaré | |||||
| estarás | |||||
| estará | |||||
| estaremos | |||||
| estaréis | |||||
| estarán | |||||
| estaría | |||||
| estarías | |||||
| estaríamos | |||||
| estaríais | |||||
| estarían | |||||
| estaba | |||||
| estabas | |||||
| estábamos | |||||
| estabais | |||||
| estaban | |||||
| estuve | |||||
| estuviste | |||||
| estuvo | |||||
| estuvimos | |||||
| estuvisteis | |||||
| estuvieron | |||||
| estuviera | |||||
| estuvieras | |||||
| estuviéramos | |||||
| estuvierais | |||||
| estuvieran | |||||
| estuviese | |||||
| estuvieses | |||||
| estuviésemos | |||||
| estuvieseis | |||||
| estuviesen | |||||
| estando | |||||
| estado | |||||
| estada | |||||
| estados | |||||
| estadas | |||||
| estad | |||||
| | forms of haber, to have (not including the infinitive): | |||||
| he | |||||
| has | |||||
| ha | |||||
| hemos | |||||
| habéis | |||||
| han | |||||
| haya | |||||
| hayas | |||||
| hayamos | |||||
| hayáis | |||||
| hayan | |||||
| habré | |||||
| habrás | |||||
| habrá | |||||
| habremos | |||||
| habréis | |||||
| habrán | |||||
| habría | |||||
| habrías | |||||
| habríamos | |||||
| habríais | |||||
| habrían | |||||
| había | |||||
| habías | |||||
| habíamos | |||||
| habíais | |||||
| habían | |||||
| hube | |||||
| hubiste | |||||
| hubo | |||||
| hubimos | |||||
| hubisteis | |||||
| hubieron | |||||
| hubiera | |||||
| hubieras | |||||
| hubiéramos | |||||
| hubierais | |||||
| hubieran | |||||
| hubiese | |||||
| hubieses | |||||
| hubiésemos | |||||
| hubieseis | |||||
| hubiesen | |||||
| habiendo | |||||
| habido | |||||
| habida | |||||
| habidos | |||||
| habidas | |||||
| | forms of ser, to be (not including the infinitive): | |||||
| soy | |||||
| eres | |||||
| es | |||||
| somos | |||||
| sois | |||||
| son | |||||
| sea | |||||
| seas | |||||
| seamos | |||||
| seáis | |||||
| sean | |||||
| seré | |||||
| serás | |||||
| será | |||||
| seremos | |||||
| seréis | |||||
| serán | |||||
| sería | |||||
| serías | |||||
| seríamos | |||||
| seríais | |||||
| serían | |||||
| era | |||||
| eras | |||||
| éramos | |||||
| erais | |||||
| eran | |||||
| fui | |||||
| fuiste | |||||
| fue | |||||
| fuimos | |||||
| fuisteis | |||||
| fueron | |||||
| fuera | |||||
| fueras | |||||
| fuéramos | |||||
| fuerais | |||||
| fueran | |||||
| fuese | |||||
| fueses | |||||
| fuésemos | |||||
| fueseis | |||||
| fuesen | |||||
| siendo | |||||
| sido | |||||
| | sed also means 'thirst' | |||||
| | forms of tener, to have (not including the infinitive): | |||||
| tengo | |||||
| tienes | |||||
| tiene | |||||
| tenemos | |||||
| tenéis | |||||
| tienen | |||||
| tenga | |||||
| tengas | |||||
| tengamos | |||||
| tengáis | |||||
| tengan | |||||
| tendré | |||||
| tendrás | |||||
| tendrá | |||||
| tendremos | |||||
| tendréis | |||||
| tendrán | |||||
| tendría | |||||
| tendrías | |||||
| tendríamos | |||||
| tendríais | |||||
| tendrían | |||||
| tenía | |||||
| tenías | |||||
| teníamos | |||||
| teníais | |||||
| tenían | |||||
| tuve | |||||
| tuviste | |||||
| tuvo | |||||
| tuvimos | |||||
| tuvisteis | |||||
| tuvieron | |||||
| tuviera | |||||
| tuvieras | |||||
| tuviéramos | |||||
| tuvierais | |||||
| tuvieran | |||||
| tuviese | |||||
| tuvieses | |||||
| tuviésemos | |||||
| tuvieseis | |||||
| tuviesen | |||||
| teniendo | |||||
| tenido | |||||
| tenida | |||||
| tenidos | |||||
| tenidas | |||||
| tened | |||||
| # example set of basque stopwords | |||||
| al | |||||
| anitz | |||||
| arabera | |||||
| asko | |||||
| baina | |||||
| bat | |||||
| batean | |||||
| batek | |||||
| bati | |||||
| batzuei | |||||
| batzuek | |||||
| batzuetan | |||||
| batzuk | |||||
| bera | |||||
| beraiek | |||||
| berau | |||||
| berauek | |||||
| bere | |||||
| berori | |||||
| beroriek | |||||
| beste | |||||
| bezala | |||||
| da | |||||
| dago | |||||
| dira | |||||
| ditu | |||||
| du | |||||
| dute | |||||
| edo | |||||
| egin | |||||
| ere | |||||
| eta | |||||
| eurak | |||||
| ez | |||||
| gainera | |||||
| gu | |||||
| gutxi | |||||
| guzti | |||||
| haiei | |||||
| haiek | |||||
| haietan | |||||
| hainbeste | |||||
| hala | |||||
| han | |||||
| handik | |||||
| hango | |||||
| hara | |||||
| hari | |||||
| hark | |||||
| hartan | |||||
| hau | |||||
| hauei | |||||
| hauek | |||||
| hauetan | |||||
| hemen | |||||
| hemendik | |||||
| hemengo | |||||
| hi | |||||
| hona | |||||
| honek | |||||
| honela | |||||
| honetan | |||||
| honi | |||||
| hor | |||||
| hori | |||||
| horiei | |||||
| horiek | |||||
| horietan | |||||
| horko | |||||
| horra | |||||
| horrek | |||||
| horrela | |||||
| horretan | |||||
| horri | |||||
| hortik | |||||
| hura | |||||
| izan | |||||
| ni | |||||
| noiz | |||||
| nola | |||||
| non | |||||
| nondik | |||||
| nongo | |||||
| nor | |||||
| nora | |||||
| ze | |||||
| zein | |||||
| zen | |||||
| zenbait | |||||
| zenbat | |||||
| zer | |||||
| zergatik | |||||
| ziren | |||||
| zituen | |||||
| zu | |||||
| zuek | |||||
| zuen | |||||
| zuten |
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| # Note: by default this file is used after normalization, so when adding entries | |||||
| # to this file, use the arabic 'ي' instead of 'ی' | |||||
| انان | |||||
| نداشته | |||||
| سراسر | |||||
| خياه | |||||
| ايشان | |||||
| وي | |||||
| تاكنون | |||||
| بيشتري | |||||
| دوم | |||||
| پس | |||||
| ناشي | |||||
| وگو | |||||
| يا | |||||
| داشتند | |||||
| سپس | |||||
| هنگام | |||||
| هرگز | |||||
| پنج | |||||
| نشان | |||||
| امسال | |||||
| ديگر | |||||
| گروهي | |||||
| شدند | |||||
| چطور | |||||
| ده | |||||
| و | |||||
| دو | |||||
| نخستين | |||||
| ولي | |||||
| چرا | |||||
| چه | |||||
| وسط | |||||
| ه | |||||
| كدام | |||||
| قابل | |||||
| يك | |||||
| رفت | |||||
| هفت | |||||
| همچنين | |||||
| در | |||||
| هزار | |||||
| بله | |||||
| بلي | |||||
| شايد | |||||
| اما | |||||
| شناسي | |||||
| گرفته | |||||
| دهد | |||||
| داشته | |||||
| دانست | |||||
| داشتن | |||||
| خواهيم | |||||
| ميليارد | |||||
| وقتيكه | |||||
| امد | |||||
| خواهد | |||||
| جز | |||||
| اورده | |||||
| شده | |||||
| بلكه | |||||
| خدمات | |||||
| شدن | |||||
| برخي | |||||
| نبود | |||||
| بسياري | |||||
| جلوگيري | |||||
| حق | |||||
| كردند | |||||
| نوعي | |||||
| بعري | |||||
| نكرده | |||||
| نظير | |||||
| نبايد | |||||
| بوده | |||||
| بودن | |||||
| داد | |||||
| اورد | |||||
| هست | |||||
| جايي | |||||
| شود | |||||
| دنبال | |||||
| داده | |||||
| بايد | |||||
| سابق | |||||
| هيچ | |||||
| همان | |||||
| انجا | |||||
| كمتر | |||||
| كجاست | |||||
| گردد | |||||
| كسي | |||||
| تر | |||||
| مردم | |||||
| تان | |||||
| دادن | |||||
| بودند | |||||
| سري | |||||
| جدا | |||||
| ندارند | |||||
| مگر | |||||
| يكديگر | |||||
| دارد | |||||
| دهند | |||||
| بنابراين | |||||
| هنگامي | |||||
| سمت | |||||
| جا | |||||
| انچه | |||||
| خود | |||||
| دادند | |||||
| زياد | |||||
| دارند | |||||
| اثر | |||||
| بدون | |||||
| بهترين | |||||
| بيشتر | |||||
| البته | |||||
| به | |||||
| براساس | |||||
| بيرون | |||||
| كرد | |||||
| بعضي | |||||
| گرفت | |||||
| توي | |||||
| اي | |||||
| ميليون | |||||
| او | |||||
| جريان | |||||
| تول | |||||
| بر | |||||
| مانند | |||||
| برابر | |||||
| باشيم | |||||
| مدتي | |||||
| گويند | |||||
| اكنون | |||||
| تا | |||||
| تنها | |||||
| جديد | |||||
| چند | |||||
| بي | |||||
| نشده | |||||
| كردن | |||||
| كردم | |||||
| گويد | |||||
| كرده | |||||
| كنيم | |||||
| نمي | |||||
| نزد | |||||
| روي | |||||
| قصد | |||||
| فقط | |||||
| بالاي | |||||
| ديگران | |||||
| اين | |||||
| ديروز | |||||
| توسط | |||||
| سوم | |||||
| ايم | |||||
| دانند | |||||
| سوي | |||||
| استفاده | |||||
| شما | |||||
| كنار | |||||
| داريم | |||||
| ساخته | |||||
| طور | |||||
| امده | |||||
| رفته | |||||
| نخست | |||||
| بيست | |||||
| نزديك | |||||
| طي | |||||
| كنيد | |||||
| از | |||||
| انها | |||||
| تمامي | |||||
| داشت | |||||
| يكي | |||||
| طريق | |||||
| اش | |||||
| چيست | |||||
| روب | |||||
| نمايد | |||||
| گفت | |||||
| چندين | |||||
| چيزي | |||||
| تواند | |||||
| ام | |||||
| ايا | |||||
| با | |||||
| ان | |||||
| ايد | |||||
| ترين | |||||
| اينكه | |||||
| ديگري | |||||
| راه | |||||
| هايي | |||||
| بروز | |||||
| همچنان | |||||
| پاعين | |||||
| كس | |||||
| حدود | |||||
| مختلف | |||||
| مقابل | |||||
| چيز | |||||
| گيرد | |||||
| ندارد | |||||
| ضد | |||||
| همچون | |||||
| سازي | |||||
| شان | |||||
| مورد | |||||
| باره | |||||
| مرسي | |||||
| خويش | |||||
| برخوردار | |||||
| چون | |||||
| خارج | |||||
| شش | |||||
| هنوز | |||||
| تحت | |||||
| ضمن | |||||
| هستيم | |||||
| گفته | |||||
| فكر | |||||
| بسيار | |||||
| پيش | |||||
| براي | |||||
| روزهاي | |||||
| انكه | |||||
| نخواهد | |||||
| بالا | |||||
| كل | |||||
| وقتي | |||||
| كي | |||||
| چنين | |||||
| كه | |||||
| گيري | |||||
| نيست | |||||
| است | |||||
| كجا | |||||
| كند | |||||
| نيز | |||||
| يابد | |||||
| بندي | |||||
| حتي | |||||
| توانند | |||||
| عقب | |||||
| خواست | |||||
| كنند | |||||
| بين | |||||
| تمام | |||||
| همه | |||||
| ما | |||||
| باشند | |||||
| مثل | |||||
| شد | |||||
| اري | |||||
| باشد | |||||
| اره | |||||
| طبق | |||||
| بعد | |||||
| اگر | |||||
| صورت | |||||
| غير | |||||
| جاي | |||||
| بيش | |||||
| ريزي | |||||
| اند | |||||
| زيرا | |||||
| چگونه | |||||
| بار | |||||
| لطفا | |||||
| مي | |||||
| درباره | |||||
| من | |||||
| ديده | |||||
| همين | |||||
| گذاري | |||||
| برداري | |||||
| علت | |||||
| گذاشته | |||||
| هم | |||||
| فوق | |||||
| نه | |||||
| ها | |||||
| شوند | |||||
| اباد | |||||
| همواره | |||||
| هر | |||||
| اول | |||||
| خواهند | |||||
| چهار | |||||
| نام | |||||
| امروز | |||||
| مان | |||||
| هاي | |||||
| قبل | |||||
| كنم | |||||
| سعي | |||||
| تازه | |||||
| را | |||||
| هستند | |||||
| زير | |||||
| جلوي | |||||
| عنوان | |||||
| بود |
| | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | forms of BE | |||||
| olla | |||||
| olen | |||||
| olet | |||||
| on | |||||
| olemme | |||||
| olette | |||||
| ovat | |||||
| ole | negative form | |||||
| oli | |||||
| olisi | |||||
| olisit | |||||
| olisin | |||||
| olisimme | |||||
| olisitte | |||||
| olisivat | |||||
| olit | |||||
| olin | |||||
| olimme | |||||
| olitte | |||||
| olivat | |||||
| ollut | |||||
| olleet | |||||
| en | negation | |||||
| et | |||||
| ei | |||||
| emme | |||||
| ette | |||||
| eivät | |||||
| |Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans | |||||
| minä minun minut minua minussa minusta minuun minulla minulta minulle | I | |||||
| sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you | |||||
| hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she | |||||
| me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we | |||||
| te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you | |||||
| he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they | |||||
| tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this | |||||
| tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that | |||||
| se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it | |||||
| nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these | |||||
| nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those | |||||
| ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they | |||||
| kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who | |||||
| ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) | |||||
| mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what | |||||
| mitkä | (pl) | |||||
| joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which | |||||
| jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) | |||||
| | conjunctions | |||||
| että | that | |||||
| ja | and | |||||
| jos | if | |||||
| koska | because | |||||
| kuin | than | |||||
| mutta | but | |||||
| niin | so | |||||
| sekä | and | |||||
| sillä | for | |||||
| tai | or | |||||
| vaan | but | |||||
| vai | or | |||||
| vaikka | although | |||||
| | prepositions | |||||
| kanssa | with | |||||
| mukaan | according to | |||||
| noin | about | |||||
| poikki | across | |||||
| yli | over, across | |||||
| | other | |||||
| kun | when | |||||
| niin | so | |||||
| nyt | now | |||||
| itse | self | |||||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | A French stop word list. Comments begin with vertical bar. Each stop | |||||
| | word is at the start of a line. | |||||
| au | a + le | |||||
| aux | a + les | |||||
| avec | with | |||||
| ce | this | |||||
| ces | these | |||||
| dans | with | |||||
| de | of | |||||
| des | de + les | |||||
| du | de + le | |||||
| elle | she | |||||
| en | `of them' etc | |||||
| et | and | |||||
| eux | them | |||||
| il | he | |||||
| je | I | |||||
| la | the | |||||
| le | the | |||||
| leur | their | |||||
| lui | him | |||||
| ma | my (fem) | |||||
| mais | but | |||||
| me | me | |||||
| même | same; as in moi-même (myself) etc | |||||
| mes | me (pl) | |||||
| moi | me | |||||
| mon | my (masc) | |||||
| ne | not | |||||
| nos | our (pl) | |||||
| notre | our | |||||
| nous | we | |||||
| on | one | |||||
| ou | where | |||||
| par | by | |||||
| pas | not | |||||
| pour | for | |||||
| qu | que before vowel | |||||
| que | that | |||||
| qui | who | |||||
| sa | his, her (fem) | |||||
| se | oneself | |||||
| ses | his (pl) | |||||
| son | his, her (masc) | |||||
| sur | on | |||||
| ta | thy (fem) | |||||
| te | thee | |||||
| tes | thy (pl) | |||||
| toi | thee | |||||
| ton | thy (masc) | |||||
| tu | thou | |||||
| un | a | |||||
| une | a | |||||
| vos | your (pl) | |||||
| votre | your | |||||
| vous | you | |||||
| | single letter forms | |||||
| c | c' | |||||
| d | d' | |||||
| j | j' | |||||
| l | l' | |||||
| à | to, at | |||||
| m | m' | |||||
| n | n' | |||||
| s | s' | |||||
| t | t' | |||||
| y | there | |||||
| | forms of être (not including the infinitive): | |||||
| été | |||||
| étée | |||||
| étées | |||||
| étés | |||||
| étant | |||||
| suis | |||||
| es | |||||
| est | |||||
| sommes | |||||
| êtes | |||||
| sont | |||||
| serai | |||||
| seras | |||||
| sera | |||||
| serons | |||||
| serez | |||||
| seront | |||||
| serais | |||||
| serait | |||||
| serions | |||||
| seriez | |||||
| seraient | |||||
| étais | |||||
| était | |||||
| étions | |||||
| étiez | |||||
| étaient | |||||
| fus | |||||
| fut | |||||
| fûmes | |||||
| fûtes | |||||
| furent | |||||
| sois | |||||
| soit | |||||
| soyons | |||||
| soyez | |||||
| soient | |||||
| fusse | |||||
| fusses | |||||
| fût | |||||
| fussions | |||||
| fussiez | |||||
| fussent | |||||
| | forms of avoir (not including the infinitive): | |||||
| ayant | |||||
| eu | |||||
| eue | |||||
| eues | |||||
| eus | |||||
| ai | |||||
| as | |||||
| avons | |||||
| avez | |||||
| ont | |||||
| aurai | |||||
| auras | |||||
| aura | |||||
| aurons | |||||
| aurez | |||||
| auront | |||||
| aurais | |||||
| aurait | |||||
| aurions | |||||
| auriez | |||||
| auraient | |||||
| avais | |||||
| avait | |||||
| avions | |||||
| aviez | |||||
| avaient | |||||
| eut | |||||
| eûmes | |||||
| eûtes | |||||
| eurent | |||||
| aie | |||||
| aies | |||||
| ait | |||||
| ayons | |||||
| ayez | |||||
| aient | |||||
| eusse | |||||
| eusses | |||||
| eût | |||||
| eussions | |||||
| eussiez | |||||
| eussent | |||||
| | Later additions (from Jean-Christophe Deschamps) | |||||
| ceci | this | |||||
| cela | that | |||||
| celà | that | |||||
| cet | this | |||||
| cette | this | |||||
| ici | here | |||||
| ils | they | |||||
| les | the (pl) | |||||
| leurs | their (pl) | |||||
| quel | which | |||||
| quels | which | |||||
| quelle | which | |||||
| quelles | which | |||||
| sans | without | |||||
| soi | oneself | |||||
| a | |||||
| ach | |||||
| ag | |||||
| agus | |||||
| an | |||||
| aon | |||||
| ar | |||||
| arna | |||||
| as | |||||
| b' | |||||
| ba | |||||
| beirt | |||||
| bhúr | |||||
| caoga | |||||
| ceathair | |||||
| ceathrar | |||||
| chomh | |||||
| chtó | |||||
| chuig | |||||
| chun | |||||
| cois | |||||
| céad | |||||
| cúig | |||||
| cúigear | |||||
| d' | |||||
| daichead | |||||
| dar | |||||
| de | |||||
| deich | |||||
| deichniúr | |||||
| den | |||||
| dhá | |||||
| do | |||||
| don | |||||
| dtí | |||||
| dá | |||||
| dár | |||||
| dó | |||||
| faoi | |||||
| faoin | |||||
| faoina | |||||
| faoinár | |||||
| fara | |||||
| fiche | |||||
| gach | |||||
| gan | |||||
| go | |||||
| gur | |||||
| haon | |||||
| hocht | |||||
| i | |||||
| iad | |||||
| idir | |||||
| in | |||||
| ina | |||||
| ins | |||||
| inár | |||||
| is | |||||
| le | |||||
| leis | |||||
| lena | |||||
| lenár | |||||
| m' | |||||
| mar | |||||
| mo | |||||
| mé | |||||
| na | |||||
| nach | |||||
| naoi | |||||
| naonúr | |||||
| ná | |||||
| ní | |||||
| níor | |||||
| nó | |||||
| nócha | |||||
| ocht | |||||
| ochtar | |||||
| os | |||||
| roimh | |||||
| sa | |||||
| seacht | |||||
| seachtar | |||||
| seachtó | |||||
| seasca | |||||
| seisear | |||||
| siad | |||||
| sibh | |||||
| sinn | |||||
| sna | |||||
| sé | |||||
| sí | |||||
| tar | |||||
| thar | |||||
| thú | |||||
| triúr | |||||
| trí | |||||
| trína | |||||
| trínár | |||||
| tríocha | |||||
| tú | |||||
| um | |||||
| ár | |||||
| é | |||||
| éis | |||||
| í | |||||
| ó | |||||
| ón | |||||
| óna | |||||
| ónár |
| # galican stopwords | |||||
| a | |||||
| aínda | |||||
| alí | |||||
| aquel | |||||
| aquela | |||||
| aquelas | |||||
| aqueles | |||||
| aquilo | |||||
| aquí | |||||
| ao | |||||
| aos | |||||
| as | |||||
| así | |||||
| á | |||||
| ben | |||||
| cando | |||||
| che | |||||
| co | |||||
| coa | |||||
| comigo | |||||
| con | |||||
| connosco | |||||
| contigo | |||||
| convosco | |||||
| coas | |||||
| cos | |||||
| cun | |||||
| cuns | |||||
| cunha | |||||
| cunhas | |||||
| da | |||||
| dalgunha | |||||
| dalgunhas | |||||
| dalgún | |||||
| dalgúns | |||||
| das | |||||
| de | |||||
| del | |||||
| dela | |||||
| delas | |||||
| deles | |||||
| desde | |||||
| deste | |||||
| do | |||||
| dos | |||||
| dun | |||||
| duns | |||||
| dunha | |||||
| dunhas | |||||
| e | |||||
| el | |||||
| ela | |||||
| elas | |||||
| eles | |||||
| en | |||||
| era | |||||
| eran | |||||
| esa | |||||
| esas | |||||
| ese | |||||
| eses | |||||
| esta | |||||
| estar | |||||
| estaba | |||||
| está | |||||
| están | |||||
| este | |||||
| estes | |||||
| estiven | |||||
| estou | |||||
| eu | |||||
| é | |||||
| facer | |||||
| foi | |||||
| foron | |||||
| fun | |||||
| había | |||||
| hai | |||||
| iso | |||||
| isto | |||||
| la | |||||
| las | |||||
| lle | |||||
| lles | |||||
| lo | |||||
| los | |||||
| mais | |||||
| me | |||||
| meu | |||||
| meus | |||||
| min | |||||
| miña | |||||
| miñas | |||||
| moi | |||||
| na | |||||
| nas | |||||
| neste | |||||
| nin | |||||
| no | |||||
| non | |||||
| nos | |||||
| nosa | |||||
| nosas | |||||
| noso | |||||
| nosos | |||||
| nós | |||||
| nun | |||||
| nunha | |||||
| nuns | |||||
| nunhas | |||||
| o | |||||
| os | |||||
| ou | |||||
| ó | |||||
| ós | |||||
| para | |||||
| pero | |||||
| pode | |||||
| pois | |||||
| pola | |||||
| polas | |||||
| polo | |||||
| polos | |||||
| por | |||||
| que | |||||
| se | |||||
| senón | |||||
| ser | |||||
| seu | |||||
| seus | |||||
| sexa | |||||
| sido | |||||
| sobre | |||||
| súa | |||||
| súas | |||||
| tamén | |||||
| tan | |||||
| te | |||||
| ten | |||||
| teñen | |||||
| teño | |||||
| ter | |||||
| teu | |||||
| teus | |||||
| ti | |||||
| tido | |||||
| tiña | |||||
| tiven | |||||
| túa | |||||
| túas | |||||
| un | |||||
| unha | |||||
| unhas | |||||
| uns | |||||
| vos | |||||
| vosa | |||||
| vosas | |||||
| voso | |||||
| vosos | |||||
| vós |
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
| # Note: by default this file also contains forms normalized by HindiNormalizer | |||||
| # for spelling variation (see section below), such that it can be used whether or | |||||
| # not you enable that feature. When adding additional entries to this list, | |||||
| # please add the normalized form as well. | |||||
| अंदर | |||||
| अत | |||||
| अपना | |||||
| अपनी | |||||
| अपने | |||||
| अभी | |||||
| आदि | |||||
| आप | |||||
| इत्यादि | |||||
| इन | |||||
| इनका | |||||
| इन्हीं | |||||
| इन्हें | |||||
| इन्हों | |||||
| इस | |||||
| इसका | |||||
| इसकी | |||||
| इसके | |||||
| इसमें | |||||
| इसी | |||||
| इसे | |||||
| उन | |||||
| उनका | |||||
| उनकी | |||||
| उनके | |||||
| उनको | |||||
| उन्हीं | |||||
| उन्हें | |||||
| उन्हों | |||||
| उस | |||||
| उसके | |||||
| उसी | |||||
| उसे | |||||
| एक | |||||
| एवं | |||||
| एस | |||||
| ऐसे | |||||
| और | |||||
| कई | |||||
| कर | |||||
| करता | |||||
| करते | |||||
| करना | |||||
| करने | |||||
| करें | |||||
| कहते | |||||
| कहा | |||||
| का | |||||
| काफ़ी | |||||
| कि | |||||
| कितना | |||||
| किन्हें | |||||
| किन्हों | |||||
| किया | |||||
| किर | |||||
| किस | |||||
| किसी | |||||
| किसे | |||||
| की | |||||
| कुछ | |||||
| कुल | |||||
| के | |||||
| को | |||||
| कोई | |||||
| कौन | |||||
| कौनसा | |||||
| गया | |||||
| घर | |||||
| जब | |||||
| जहाँ | |||||
| जा | |||||
| जितना | |||||
| जिन | |||||
| जिन्हें | |||||
| जिन्हों | |||||
| जिस | |||||
| जिसे | |||||
| जीधर | |||||
| जैसा | |||||
| जैसे | |||||
| जो | |||||
| तक | |||||
| तब | |||||
| तरह | |||||
| तिन | |||||
| तिन्हें | |||||
| तिन्हों | |||||
| तिस | |||||
| तिसे | |||||
| तो | |||||
| था | |||||
| थी | |||||
| थे | |||||
| दबारा | |||||
| दिया | |||||
| दुसरा | |||||
| दूसरे | |||||
| दो | |||||
| द्वारा | |||||
| न | |||||
| नहीं | |||||
| ना | |||||
| निहायत | |||||
| नीचे | |||||
| ने | |||||
| पर | |||||
| पर | |||||
| पहले | |||||
| पूरा | |||||
| पे | |||||
| फिर | |||||
| बनी | |||||
| बही | |||||
| बहुत | |||||
| बाद | |||||
| बाला | |||||
| बिलकुल | |||||
| भी | |||||
| भीतर | |||||
| मगर | |||||
| मानो | |||||
| मे | |||||
| में | |||||
| यदि | |||||
| यह | |||||
| यहाँ | |||||
| यही | |||||
| या | |||||
| यिह | |||||
| ये | |||||
| रखें | |||||
| रहा | |||||
| रहे | |||||
| ऱ्वासा | |||||
| लिए | |||||
| लिये | |||||
| लेकिन | |||||
| व | |||||
| वर्ग | |||||
| वह | |||||
| वह | |||||
| वहाँ | |||||
| वहीं | |||||
| वाले | |||||
| वुह | |||||
| वे | |||||
| वग़ैरह | |||||
| संग | |||||
| सकता | |||||
| सकते | |||||
| सबसे | |||||
| सभी | |||||
| साथ | |||||
| साबुत | |||||
| साभ | |||||
| सारा | |||||
| से | |||||
| सो | |||||
| ही | |||||
| हुआ | |||||
| हुई | |||||
| हुए | |||||
| है | |||||
| हैं | |||||
| हो | |||||
| होता | |||||
| होती | |||||
| होते | |||||
| होना | |||||
| होने | |||||
| # additional normalized forms of the above | |||||
| अपनि | |||||
| जेसे | |||||
| होति | |||||
| सभि | |||||
| तिंहों | |||||
| इंहों | |||||
| दवारा | |||||
| इसि | |||||
| किंहें | |||||
| थि | |||||
| उंहों | |||||
| ओर | |||||
| जिंहें | |||||
| वहिं | |||||
| अभि | |||||
| बनि | |||||
| हि | |||||
| उंहिं | |||||
| उंहें | |||||
| हें | |||||
| वगेरह | |||||
| एसे | |||||
| रवासा | |||||
| कोन | |||||
| निचे | |||||
| काफि | |||||
| उसि | |||||
| पुरा | |||||
| भितर | |||||
| हे | |||||
| बहि | |||||
| वहां | |||||
| कोइ | |||||
| यहां | |||||
| जिंहों | |||||
| तिंहें | |||||
| किसि | |||||
| कइ | |||||
| यहि | |||||
| इंहिं | |||||
| जिधर | |||||
| इंहें | |||||
| अदि | |||||
| इतयादि | |||||
| हुइ | |||||
| कोनसा | |||||
| इसकि | |||||
| दुसरे | |||||
| जहां | |||||
| अप | |||||
| किंहों | |||||
| उनकि | |||||
| भि | |||||
| वरग | |||||
| हुअ | |||||
| जेसा | |||||
| नहिं |
| | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | Hungarian stop word list | |||||
| | prepared by Anna Tordai | |||||
| a | |||||
| ahogy | |||||
| ahol | |||||
| aki | |||||
| akik | |||||
| akkor | |||||
| alatt | |||||
| által | |||||
| általában | |||||
| amely | |||||
| amelyek | |||||
| amelyekben | |||||
| amelyeket | |||||
| amelyet | |||||
| amelynek | |||||
| ami | |||||
| amit | |||||
| amolyan | |||||
| amíg | |||||
| amikor | |||||
| át | |||||
| abban | |||||
| ahhoz | |||||
| annak | |||||
| arra | |||||
| arról | |||||
| az | |||||
| azok | |||||
| azon | |||||
| azt | |||||
| azzal | |||||
| azért | |||||
| aztán | |||||
| azután | |||||
| azonban | |||||
| bár | |||||
| be | |||||
| belül | |||||
| benne | |||||
| cikk | |||||
| cikkek | |||||
| cikkeket | |||||
| csak | |||||
| de | |||||
| e | |||||
| eddig | |||||
| egész | |||||
| egy | |||||
| egyes | |||||
| egyetlen | |||||
| egyéb | |||||
| egyik | |||||
| egyre | |||||
| ekkor | |||||
| el | |||||
| elég | |||||
| ellen | |||||
| elő | |||||
| először | |||||
| előtt | |||||
| első | |||||
| én | |||||
| éppen | |||||
| ebben | |||||
| ehhez | |||||
| emilyen | |||||
| ennek | |||||
| erre | |||||
| ez | |||||
| ezt | |||||
| ezek | |||||
| ezen | |||||
| ezzel | |||||
| ezért | |||||
| és | |||||
| fel | |||||
| felé | |||||
| hanem | |||||
| hiszen | |||||
| hogy | |||||
| hogyan | |||||
| igen | |||||
| így | |||||
| illetve | |||||
| ill. | |||||
| ill | |||||
| ilyen | |||||
| ilyenkor | |||||
| ison | |||||
| ismét | |||||
| itt | |||||
| jó | |||||
| jól | |||||
| jobban | |||||
| kell | |||||
| kellett | |||||
| keresztül | |||||
| keressünk | |||||
| ki | |||||
| kívül | |||||
| között | |||||
| közül | |||||
| legalább | |||||
| lehet | |||||
| lehetett | |||||
| legyen | |||||
| lenne | |||||
| lenni | |||||
| lesz | |||||
| lett | |||||
| maga | |||||
| magát | |||||
| majd | |||||
| majd | |||||
| már | |||||
| más | |||||
| másik | |||||
| meg | |||||
| még | |||||
| mellett | |||||
| mert | |||||
| mely | |||||
| melyek | |||||
| mi | |||||
| mit | |||||
| míg | |||||
| miért | |||||
| milyen | |||||
| mikor | |||||
| minden | |||||
| mindent | |||||
| mindenki | |||||
| mindig | |||||
| mint | |||||
| mintha | |||||
| mivel | |||||
| most | |||||
| nagy | |||||
| nagyobb | |||||
| nagyon | |||||
| ne | |||||
| néha | |||||
| nekem | |||||
| neki | |||||
| nem | |||||
| néhány | |||||
| nélkül | |||||
| nincs | |||||
| olyan | |||||
| ott | |||||
| össze | |||||
| ő | |||||
| ők | |||||
| őket | |||||
| pedig | |||||
| persze | |||||
| rá | |||||
| s | |||||
| saját | |||||
| sem | |||||
| semmi | |||||
| sok | |||||
| sokat | |||||
| sokkal | |||||
| számára | |||||
| szemben | |||||
| szerint | |||||
| szinte | |||||
| talán | |||||
| tehát | |||||
| teljes | |||||
| tovább | |||||
| továbbá | |||||
| több | |||||
| úgy | |||||
| ugyanis | |||||
| új | |||||
| újabb | |||||
| újra | |||||
| után | |||||
| utána | |||||
| utolsó | |||||
| vagy | |||||
| vagyis | |||||
| valaki | |||||
| valami | |||||
| valamint | |||||
| való | |||||
| vagyok | |||||
| van | |||||
| vannak | |||||
| volt | |||||
| voltam | |||||
| voltak | |||||
| voltunk | |||||
| vissza | |||||
| vele | |||||
| viszont | |||||
| volna |
| # example set of Armenian stopwords. | |||||
| այդ | |||||
| այլ | |||||
| այն | |||||
| այս | |||||
| դու | |||||
| դուք | |||||
| եմ | |||||
| են | |||||
| ենք | |||||
| ես | |||||
| եք | |||||
| է | |||||
| էի | |||||
| էին | |||||
| էինք | |||||
| էիր | |||||
| էիք | |||||
| էր | |||||
| ըստ | |||||
| թ | |||||
| ի | |||||
| ին | |||||
| իսկ | |||||
| իր | |||||
| կամ | |||||
| համար | |||||
| հետ | |||||
| հետո | |||||
| մենք | |||||
| մեջ | |||||
| մի | |||||
| ն | |||||
| նա | |||||
| նաև | |||||
| նրա | |||||
| նրանք | |||||
| որ | |||||
| որը | |||||
| որոնք | |||||
| որպես | |||||
| ու | |||||
| ում | |||||
| պիտի | |||||
| վրա | |||||
| և |
| # from appendix D of: A Study of Stemming Effects on Information | |||||
| # Retrieval in Bahasa Indonesia | |||||
| ada | |||||
| adanya | |||||
| adalah | |||||
| adapun | |||||
| agak | |||||
| agaknya | |||||
| agar | |||||
| akan | |||||
| akankah | |||||
| akhirnya | |||||
| aku | |||||
| akulah | |||||
| amat | |||||
| amatlah | |||||
| anda | |||||
| andalah | |||||
| antar | |||||
| diantaranya | |||||
| antara | |||||
| antaranya | |||||
| diantara | |||||
| apa | |||||
| apaan | |||||
| mengapa | |||||
| apabila | |||||
| apakah | |||||
| apalagi | |||||
| apatah | |||||
| atau | |||||
| ataukah | |||||
| ataupun | |||||
| bagai | |||||
| bagaikan | |||||
| sebagai | |||||
| sebagainya | |||||
| bagaimana | |||||
| bagaimanapun | |||||
| sebagaimana | |||||
| bagaimanakah | |||||
| bagi | |||||
| bahkan | |||||
| bahwa | |||||
| bahwasanya | |||||
| sebaliknya | |||||
| banyak | |||||
| sebanyak | |||||
| beberapa | |||||
| seberapa | |||||
| begini | |||||
| beginian | |||||
| beginikah | |||||
| beginilah | |||||
| sebegini | |||||
| begitu | |||||
| begitukah | |||||
| begitulah | |||||
| begitupun | |||||
| sebegitu | |||||
| belum | |||||
| belumlah | |||||
| sebelum | |||||
| sebelumnya | |||||
| sebenarnya | |||||
| berapa | |||||
| berapakah | |||||
| berapalah | |||||
| berapapun | |||||
| betulkah | |||||
| sebetulnya | |||||
| biasa | |||||
| biasanya | |||||
| bila | |||||
| bilakah | |||||
| bisa | |||||
| bisakah | |||||
| sebisanya | |||||
| boleh | |||||
| bolehkah | |||||
| bolehlah | |||||
| buat | |||||
| bukan | |||||
| bukankah | |||||
| bukanlah | |||||
| bukannya | |||||
| cuma | |||||
| percuma | |||||
| dahulu | |||||
| dalam | |||||
| dan | |||||
| dapat | |||||
| dari | |||||
| daripada | |||||
| dekat | |||||
| demi | |||||
| demikian | |||||
| demikianlah | |||||
| sedemikian | |||||
| dengan | |||||
| depan | |||||
| di | |||||
| dia | |||||
| dialah | |||||
| dini | |||||
| diri | |||||
| dirinya | |||||
| terdiri | |||||
| dong | |||||
| dulu | |||||
| enggak | |||||
| enggaknya | |||||
| entah | |||||
| entahlah | |||||
| terhadap | |||||
| terhadapnya | |||||
| hal | |||||
| hampir | |||||
| hanya | |||||
| hanyalah | |||||
| harus | |||||
| haruslah | |||||
| harusnya | |||||
| seharusnya | |||||
| hendak | |||||
| hendaklah | |||||
| hendaknya | |||||
| hingga | |||||
| sehingga | |||||
| ia | |||||
| ialah | |||||
| ibarat | |||||
| ingin | |||||
| inginkah | |||||
| inginkan | |||||
| ini | |||||
| inikah | |||||
| inilah | |||||
| itu | |||||
| itukah | |||||
| itulah | |||||
| jangan | |||||
| jangankan | |||||
| janganlah | |||||
| jika | |||||
| jikalau | |||||
| juga | |||||
| justru | |||||
| kala | |||||
| kalau | |||||
| kalaulah | |||||
| kalaupun | |||||
| kalian | |||||
| kami | |||||
| kamilah | |||||
| kamu | |||||
| kamulah | |||||
| kan | |||||
| kapan | |||||
| kapankah | |||||
| kapanpun | |||||
| dikarenakan | |||||
| karena | |||||
| karenanya | |||||
| ke | |||||
| kecil | |||||
| kemudian | |||||
| kenapa | |||||
| kepada | |||||
| kepadanya | |||||
| ketika | |||||
| seketika | |||||
| khususnya | |||||
| kini | |||||
| kinilah | |||||
| kiranya | |||||
| sekiranya | |||||
| kita | |||||
| kitalah | |||||
| kok | |||||
| lagi | |||||
| lagian | |||||
| selagi | |||||
| lah | |||||
| lain | |||||
| lainnya | |||||
| melainkan | |||||
| selaku | |||||
| lalu | |||||
| melalui | |||||
| terlalu | |||||
| lama | |||||
| lamanya | |||||
| selama | |||||
| selama | |||||
| selamanya | |||||
| lebih | |||||
| terlebih | |||||
| bermacam | |||||
| macam | |||||
| semacam | |||||
| maka | |||||
| makanya | |||||
| makin | |||||
| malah | |||||
| malahan | |||||
| mampu | |||||
| mampukah | |||||
| mana | |||||
| manakala | |||||
| manalagi | |||||
| masih | |||||
| masihkah | |||||
| semasih | |||||
| masing | |||||
| mau | |||||
| maupun | |||||
| semaunya | |||||
| memang | |||||
| mereka | |||||
| merekalah | |||||
| meski | |||||
| meskipun | |||||
| semula | |||||
| mungkin | |||||
| mungkinkah | |||||
| nah | |||||
| namun | |||||
| nanti | |||||
| nantinya | |||||
| nyaris | |||||
| oleh | |||||
| olehnya | |||||
| seorang | |||||
| seseorang | |||||
| pada | |||||
| padanya | |||||
| padahal | |||||
| paling | |||||
| sepanjang | |||||
| pantas | |||||
| sepantasnya | |||||
| sepantasnyalah | |||||
| para | |||||
| pasti | |||||
| pastilah | |||||
| per | |||||
| pernah | |||||
| pula | |||||
| pun | |||||
| merupakan | |||||
| rupanya | |||||
| serupa | |||||
| saat | |||||
| saatnya | |||||
| sesaat | |||||
| saja | |||||
| sajalah | |||||
| saling | |||||
| bersama | |||||
| sama | |||||
| sesama | |||||
| sambil | |||||
| sampai | |||||
| sana | |||||
| sangat | |||||
| sangatlah | |||||
| saya | |||||
| sayalah | |||||
| se | |||||
| sebab | |||||
| sebabnya | |||||
| sebuah | |||||
| tersebut | |||||
| tersebutlah | |||||
| sedang | |||||
| sedangkan | |||||
| sedikit | |||||
| sedikitnya | |||||
| segala | |||||
| segalanya | |||||
| segera | |||||
| sesegera | |||||
| sejak | |||||
| sejenak | |||||
| sekali | |||||
| sekalian | |||||
| sekalipun | |||||
| sesekali | |||||
| sekaligus | |||||
| sekarang | |||||
| sekarang | |||||
| sekitar | |||||
| sekitarnya | |||||
| sela | |||||
| selain | |||||
| selalu | |||||
| seluruh | |||||
| seluruhnya | |||||
| semakin | |||||
| sementara | |||||
| sempat | |||||
| semua | |||||
| semuanya | |||||
| sendiri | |||||
| sendirinya | |||||
| seolah | |||||
| seperti | |||||
| sepertinya | |||||
| sering | |||||
| seringnya | |||||
| serta | |||||
| siapa | |||||
| siapakah | |||||
| siapapun | |||||
| disini | |||||
| disinilah | |||||
| sini | |||||
| sinilah | |||||
| sesuatu | |||||
| sesuatunya | |||||
| suatu | |||||
| sesudah | |||||
| sesudahnya | |||||
| sudah | |||||
| sudahkah | |||||
| sudahlah | |||||
| supaya | |||||
| tadi | |||||
| tadinya | |||||
| tak | |||||
| tanpa | |||||
| setelah | |||||
| telah | |||||
| tentang | |||||
| tentu | |||||
| tentulah | |||||
| tentunya | |||||
| tertentu | |||||
| seterusnya | |||||
| tapi | |||||
| tetapi | |||||
| setiap | |||||
| tiap | |||||
| setidaknya | |||||
| tidak | |||||
| tidakkah | |||||
| tidaklah | |||||
| toh | |||||
| waduh | |||||
| wah | |||||
| wahai | |||||
| sewaktu | |||||
| walau | |||||
| walaupun | |||||
| wong | |||||
| yaitu | |||||
| yakni | |||||
| yang |
| | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | An Italian stop word list. Comments begin with vertical bar. Each stop | |||||
| | word is at the start of a line. | |||||
| ad | a (to) before vowel | |||||
| al | a + il | |||||
| allo | a + lo | |||||
| ai | a + i | |||||
| agli | a + gli | |||||
| all | a + l' | |||||
| agl | a + gl' | |||||
| alla | a + la | |||||
| alle | a + le | |||||
| con | with | |||||
| col | con + il | |||||
| coi | con + i (forms collo, cogli etc are now very rare) | |||||
| da | from | |||||
| dal | da + il | |||||
| dallo | da + lo | |||||
| dai | da + i | |||||
| dagli | da + gli | |||||
| dall | da + l' | |||||
| dagl | da + gll' | |||||
| dalla | da + la | |||||
| dalle | da + le | |||||
| di | of | |||||
| del | di + il | |||||
| dello | di + lo | |||||
| dei | di + i | |||||
| degli | di + gli | |||||
| dell | di + l' | |||||
| degl | di + gl' | |||||
| della | di + la | |||||
| delle | di + le | |||||
| in | in | |||||
| nel | in + el | |||||
| nello | in + lo | |||||
| nei | in + i | |||||
| negli | in + gli | |||||
| nell | in + l' | |||||
| negl | in + gl' | |||||
| nella | in + la | |||||
| nelle | in + le | |||||
| su | on | |||||
| sul | su + il | |||||
| sullo | su + lo | |||||
| sui | su + i | |||||
| sugli | su + gli | |||||
| sull | su + l' | |||||
| sugl | su + gl' | |||||
| sulla | su + la | |||||
| sulle | su + le | |||||
| per | through, by | |||||
| tra | among | |||||
| contro | against | |||||
| io | I | |||||
| tu | thou | |||||
| lui | he | |||||
| lei | she | |||||
| noi | we | |||||
| voi | you | |||||
| loro | they | |||||
| mio | my | |||||
| mia | | |||||
| miei | | |||||
| mie | | |||||
| tuo | | |||||
| tua | | |||||
| tuoi | thy | |||||
| tue | | |||||
| suo | | |||||
| sua | | |||||
| suoi | his, her | |||||
| sue | | |||||
| nostro | our | |||||
| nostra | | |||||
| nostri | | |||||
| nostre | | |||||
| vostro | your | |||||
| vostra | | |||||
| vostri | | |||||
| vostre | | |||||
| mi | me | |||||
| ti | thee | |||||
| ci | us, there | |||||
| vi | you, there | |||||
| lo | him, the | |||||
| la | her, the | |||||
| li | them | |||||
| le | them, the | |||||
| gli | to him, the | |||||
| ne | from there etc | |||||
| il | the | |||||
| un | a | |||||
| uno | a | |||||
| una | a | |||||
| ma | but | |||||
| ed | and | |||||
| se | if | |||||
| perché | why, because | |||||
| anche | also | |||||
| come | how | |||||
| dov | where (as dov') | |||||
| dove | where | |||||
| che | who, that | |||||
| chi | who | |||||
| cui | whom | |||||
| non | not | |||||
| più | more | |||||
| quale | who, that | |||||
| quanto | how much | |||||
| quanti | | |||||
| quanta | | |||||
| quante | | |||||
| quello | that | |||||
| quelli | | |||||
| quella | | |||||
| quelle | | |||||
| questo | this | |||||
| questi | | |||||
| questa | | |||||
| queste | | |||||
| si | yes | |||||
| tutto | all | |||||
| tutti | all | |||||
| | single letter forms: | |||||
| a | at | |||||
| c | as c' for ce or ci | |||||
| e | and | |||||
| i | the | |||||
| l | as l' | |||||
| o | or | |||||
| | forms of avere, to have (not including the infinitive): | |||||
| ho | |||||
| hai | |||||
| ha | |||||
| abbiamo | |||||
| avete | |||||
| hanno | |||||
| abbia | |||||
| abbiate | |||||
| abbiano | |||||
| avrò | |||||
| avrai | |||||
| avrà | |||||
| avremo | |||||
| avrete | |||||
| avranno | |||||
| avrei | |||||
| avresti | |||||
| avrebbe | |||||
| avremmo | |||||
| avreste | |||||
| avrebbero | |||||
| avevo | |||||
| avevi | |||||
| aveva | |||||
| avevamo | |||||
| avevate | |||||
| avevano | |||||
| ebbi | |||||
| avesti | |||||
| ebbe | |||||
| avemmo | |||||
| aveste | |||||
| ebbero | |||||
| avessi | |||||
| avesse | |||||
| avessimo | |||||
| avessero | |||||
| avendo | |||||
| avuto | |||||
| avuta | |||||
| avuti | |||||
| avute | |||||
| | forms of essere, to be (not including the infinitive): | |||||
| sono | |||||
| sei | |||||
| è | |||||
| siamo | |||||
| siete | |||||
| sia | |||||
| siate | |||||
| siano | |||||
| sarò | |||||
| sarai | |||||
| sarà | |||||
| saremo | |||||
| sarete | |||||
| saranno | |||||
| sarei | |||||
| saresti | |||||
| sarebbe | |||||
| saremmo | |||||
| sareste | |||||
| sarebbero | |||||
| ero | |||||
| eri | |||||
| era | |||||
| eravamo | |||||
| eravate | |||||
| erano | |||||
| fui | |||||
| fosti | |||||
| fu | |||||
| fummo | |||||
| foste | |||||
| furono | |||||
| fossi | |||||
| fosse | |||||
| fossimo | |||||
| fossero | |||||
| essendo | |||||
| | forms of fare, to do (not including the infinitive, fa, fat-): | |||||
| faccio | |||||
| fai | |||||
| facciamo | |||||
| fanno | |||||
| faccia | |||||
| facciate | |||||
| facciano | |||||
| farò | |||||
| farai | |||||
| farà | |||||
| faremo | |||||
| farete | |||||
| faranno | |||||
| farei | |||||
| faresti | |||||
| farebbe | |||||
| faremmo | |||||
| fareste | |||||
| farebbero | |||||
| facevo | |||||
| facevi | |||||
| faceva | |||||
| facevamo | |||||
| facevate | |||||
| facevano | |||||
| feci | |||||
| facesti | |||||
| fece | |||||
| facemmo | |||||
| faceste | |||||
| fecero | |||||
| facessi | |||||
| facesse | |||||
| facessimo | |||||
| facessero | |||||
| facendo | |||||
| | forms of stare, to be (not including the infinitive): | |||||
| sto | |||||
| stai | |||||
| sta | |||||
| stiamo | |||||
| stanno | |||||
| stia | |||||
| stiate | |||||
| stiano | |||||
| starò | |||||
| starai | |||||
| starà | |||||
| staremo | |||||
| starete | |||||
| staranno | |||||
| starei | |||||
| staresti | |||||
| starebbe | |||||
| staremmo | |||||
| stareste | |||||
| starebbero | |||||
| stavo | |||||
| stavi | |||||
| stava | |||||
| stavamo | |||||
| stavate | |||||
| stavano | |||||
| stetti | |||||
| stesti | |||||
| stette | |||||
| stemmo | |||||
| steste | |||||
| stettero | |||||
| stessi | |||||
| stesse | |||||
| stessimo | |||||
| stessero | |||||
| stando |
| # | |||||
| # This file defines a stopword set for Japanese. | |||||
| # | |||||
| # This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. | |||||
| # Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 | |||||
| # for frequency lists, etc. that can be useful for making your own set (if desired) | |||||
| # | |||||
| # Note that there is an overlap between these stopwords and the terms stopped when used | |||||
| # in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note | |||||
| # that comments are not allowed on the same line as stopwords. | |||||
| # | |||||
| # Also note that stopping is done in a case-insensitive manner. Change your StopFilter | |||||
| # configuration if you need case-sensitive stopping. Lastly, note that stopping is done | |||||
| # using the same character width as the entries in this file. Since this StopFilter is | |||||
| # normally done after a CJKWidthFilter in your chain, you would usually want your romaji | |||||
| # entries to be in half-width and your kana entries to be in full-width. | |||||
| # | |||||
| の | |||||
| に | |||||
| は | |||||
| を | |||||
| た | |||||
| が | |||||
| で | |||||
| て | |||||
| と | |||||
| し | |||||
| れ | |||||
| さ | |||||
| ある | |||||
| いる | |||||
| も | |||||
| する | |||||
| から | |||||
| な | |||||
| こと | |||||
| として | |||||
| い | |||||
| や | |||||
| れる | |||||
| など | |||||
| なっ | |||||
| ない | |||||
| この | |||||
| ため | |||||
| その | |||||
| あっ | |||||
| よう | |||||
| また | |||||
| もの | |||||
| という | |||||
| あり | |||||
| まで | |||||
| られ | |||||
| なる | |||||
| へ | |||||
| か | |||||
| だ | |||||
| これ | |||||
| によって | |||||
| により | |||||
| おり | |||||
| より | |||||
| による | |||||
| ず | |||||
| なり | |||||
| られる | |||||
| において | |||||
| ば | |||||
| なかっ | |||||
| なく | |||||
| しかし | |||||
| について | |||||
| せ | |||||
| だっ | |||||
| その後 | |||||
| できる | |||||
| それ | |||||
| う | |||||
| ので | |||||
| なお | |||||
| のみ | |||||
| でき | |||||
| き | |||||
| つ | |||||
| における | |||||
| および | |||||
| いう | |||||
| さらに | |||||
| でも | |||||
| ら | |||||
| たり | |||||
| その他 | |||||
| に関する | |||||
| たち | |||||
| ます | |||||
| ん | |||||
| なら | |||||
| に対して | |||||
| 特に | |||||
| せる | |||||
| 及び | |||||
| これら | |||||
| とき | |||||
| では | |||||
| にて | |||||
| ほか | |||||
| ながら | |||||
| うち | |||||
| そして | |||||
| とともに | |||||
| ただし | |||||
| かつて | |||||
| それぞれ | |||||
| または | |||||
| お | |||||
| ほど | |||||
| ものの | |||||
| に対する | |||||
| ほとんど | |||||
| と共に | |||||
| といった | |||||
| です | |||||
| とも | |||||
| ところ | |||||
| ここ | |||||
| ##### End of file |
| # Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins | |||||
| # the original list of over 800 forms was refined: | |||||
| # pronouns, adverbs, interjections were removed | |||||
| # | |||||
| # prepositions | |||||
| aiz | |||||
| ap | |||||
| ar | |||||
| apakš | |||||
| ārpus | |||||
| augšpus | |||||
| bez | |||||
| caur | |||||
| dēļ | |||||
| gar | |||||
| iekš | |||||
| iz | |||||
| kopš | |||||
| labad | |||||
| lejpus | |||||
| līdz | |||||
| no | |||||
| otrpus | |||||
| pa | |||||
| par | |||||
| pār | |||||
| pēc | |||||
| pie | |||||
| pirms | |||||
| pret | |||||
| priekš | |||||
| starp | |||||
| šaipus | |||||
| uz | |||||
| viņpus | |||||
| virs | |||||
| virspus | |||||
| zem | |||||
| apakšpus | |||||
| # Conjunctions | |||||
| un | |||||
| bet | |||||
| jo | |||||
| ja | |||||
| ka | |||||
| lai | |||||
| tomēr | |||||
| tikko | |||||
| turpretī | |||||
| arī | |||||
| kaut | |||||
| gan | |||||
| tādēļ | |||||
| tā | |||||
| ne | |||||
| tikvien | |||||
| vien | |||||
| kā | |||||
| ir | |||||
| te | |||||
| vai | |||||
| kamēr | |||||
| # Particles | |||||
| ar | |||||
| diezin | |||||
| droši | |||||
| diemžēl | |||||
| nebūt | |||||
| ik | |||||
| it | |||||
| taču | |||||
| nu | |||||
| pat | |||||
| tiklab | |||||
| iekšpus | |||||
| nedz | |||||
| tik | |||||
| nevis | |||||
| turpretim | |||||
| jeb | |||||
| iekam | |||||
| iekām | |||||
| iekāms | |||||
| kolīdz | |||||
| līdzko | |||||
| tiklīdz | |||||
| jebšu | |||||
| tālab | |||||
| tāpēc | |||||
| nekā | |||||
| itin | |||||
| jā | |||||
| jau | |||||
| jel | |||||
| nē | |||||
| nezin | |||||
| tad | |||||
| tikai | |||||
| vis | |||||
| tak | |||||
| iekams | |||||
| vien | |||||
| # modal verbs | |||||
| būt | |||||
| biju | |||||
| biji | |||||
| bija | |||||
| bijām | |||||
| bijāt | |||||
| esmu | |||||
| esi | |||||
| esam | |||||
| esat | |||||
| būšu | |||||
| būsi | |||||
| būs | |||||
| būsim | |||||
| būsiet | |||||
| tikt | |||||
| tiku | |||||
| tiki | |||||
| tika | |||||
| tikām | |||||
| tikāt | |||||
| tieku | |||||
| tiec | |||||
| tiek | |||||
| tiekam | |||||
| tiekat | |||||
| tikšu | |||||
| tiks | |||||
| tiksim | |||||
| tiksiet | |||||
| tapt | |||||
| tapi | |||||
| tapāt | |||||
| topat | |||||
| tapšu | |||||
| tapsi | |||||
| taps | |||||
| tapsim | |||||
| tapsiet | |||||
| kļūt | |||||
| kļuvu | |||||
| kļuvi | |||||
| kļuva | |||||
| kļuvām | |||||
| kļuvāt | |||||
| kļūstu | |||||
| kļūsti | |||||
| kļūst | |||||
| kļūstam | |||||
| kļūstat | |||||
| kļūšu | |||||
| kļūsi | |||||
| kļūs | |||||
| kļūsim | |||||
| kļūsiet | |||||
| # verbs | |||||
| varēt | |||||
| varēju | |||||
| varējām | |||||
| varēšu | |||||
| varēsim | |||||
| var | |||||
| varēji | |||||
| varējāt | |||||
| varēsi | |||||
| varēsiet | |||||
| varat | |||||
| varēja | |||||
| varēs |
| | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | A Dutch stop word list. Comments begin with vertical bar. Each stop | |||||
| | word is at the start of a line. | |||||
| | This is a ranked list (commonest to rarest) of stopwords derived from | |||||
| | a large sample of Dutch text. | |||||
| | Dutch stop words frequently exhibit homonym clashes. These are indicated | |||||
| | clearly below. | |||||
| de | the | |||||
| en | and | |||||
| van | of, from | |||||
| ik | I, the ego | |||||
| te | (1) chez, at etc, (2) to, (3) too | |||||
| dat | that, which | |||||
| die | that, those, who, which | |||||
| in | in, inside | |||||
| een | a, an, one | |||||
| hij | he | |||||
| het | the, it | |||||
| niet | not, nothing, naught | |||||
| zijn | (1) to be, being, (2) his, one's, its | |||||
| is | is | |||||
| was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river | |||||
| op | on, upon, at, in, up, used up | |||||
| aan | on, upon, to (as dative) | |||||
| met | with, by | |||||
| als | like, such as, when | |||||
| voor | (1) before, in front of, (2) furrow | |||||
| had | had, past tense all persons sing. of 'hebben' (have) | |||||
| er | there | |||||
| maar | but, only | |||||
| om | round, about, for etc | |||||
| hem | him | |||||
| dan | then | |||||
| zou | should/would, past tense all persons sing. of 'zullen' | |||||
| of | or, whether, if | |||||
| wat | what, something, anything | |||||
| mijn | possessive and noun 'mine' | |||||
| men | people, 'one' | |||||
| dit | this | |||||
| zo | so, thus, in this way | |||||
| door | through by | |||||
| over | over, across | |||||
| ze | she, her, they, them | |||||
| zich | oneself | |||||
| bij | (1) a bee, (2) by, near, at | |||||
| ook | also, too | |||||
| tot | till, until | |||||
| je | you | |||||
| mij | me | |||||
| uit | out of, from | |||||
| der | Old Dutch form of 'van der' still found in surnames | |||||
| daar | (1) there, (2) because | |||||
| haar | (1) her, their, them, (2) hair | |||||
| naar | (1) unpleasant, unwell etc, (2) towards, (3) as | |||||
| heb | present first person sing. of 'to have' | |||||
| hoe | how, why | |||||
| heeft | present third person sing. of 'to have' | |||||
| hebben | 'to have' and various parts thereof | |||||
| deze | this | |||||
| u | you | |||||
| want | (1) for, (2) mitten, (3) rigging | |||||
| nog | yet, still | |||||
| zal | 'shall', first and third person sing. of verb 'zullen' (will) | |||||
| me | me | |||||
| zij | she, they | |||||
| nu | now | |||||
| ge | 'thou', still used in Belgium and south Netherlands | |||||
| geen | none | |||||
| omdat | because | |||||
| iets | something, somewhat | |||||
| worden | to become, grow, get | |||||
| toch | yet, still | |||||
| al | all, every, each | |||||
| waren | (1) 'were' (2) to wander, (3) wares, (3) | |||||
| veel | much, many | |||||
| meer | (1) more, (2) lake | |||||
| doen | to do, to make | |||||
| toen | then, when | |||||
| moet | noun 'spot/mote' and present form of 'to must' | |||||
| ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' | |||||
| zonder | without | |||||
| kan | noun 'can' and present form of 'to be able' | |||||
| hun | their, them | |||||
| dus | so, consequently | |||||
| alles | all, everything, anything | |||||
| onder | under, beneath | |||||
| ja | yes, of course | |||||
| eens | once, one day | |||||
| hier | here | |||||
| wie | who | |||||
| werd | imperfect third person sing. of 'become' | |||||
| altijd | always | |||||
| doch | yet, but etc | |||||
| wordt | present third person sing. of 'become' | |||||
| wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans | |||||
| kunnen | to be able | |||||
| ons | us/our | |||||
| zelf | self | |||||
| tegen | against, towards, at | |||||
| na | after, near | |||||
| reeds | already | |||||
| wil | (1) present tense of 'want', (2) 'will', noun, (3) fender | |||||
| kon | could; past tense of 'to be able' | |||||
| niets | nothing | |||||
| uw | your | |||||
| iemand | somebody | |||||
| geweest | been; past participle of 'be' | |||||
| andere | other |
| | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | A Norwegian stop word list. Comments begin with vertical bar. Each stop | |||||
| | word is at the start of a line. | |||||
| | This stop word list is for the dominant bokmål dialect. Words unique | |||||
| | to nynorsk are marked *. | |||||
| | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005 | |||||
| og | and | |||||
| i | in | |||||
| jeg | I | |||||
| det | it/this/that | |||||
| at | to (w. inf.) | |||||
| en | a/an | |||||
| et | a/an | |||||
| den | it/this/that | |||||
| til | to | |||||
| er | is/am/are | |||||
| som | who/that | |||||
| på | on | |||||
| de | they / you(formal) | |||||
| med | with | |||||
| han | he | |||||
| av | of | |||||
| ikke | not | |||||
| ikkje | not * | |||||
| der | there | |||||
| så | so | |||||
| var | was/were | |||||
| meg | me | |||||
| seg | you | |||||
| men | but | |||||
| ett | one | |||||
| har | have | |||||
| om | about | |||||
| vi | we | |||||
| min | my | |||||
| mitt | my | |||||
| ha | have | |||||
| hadde | had | |||||
| hun | she | |||||
| nå | now | |||||
| over | over | |||||
| da | when/as | |||||
| ved | by/know | |||||
| fra | from | |||||
| du | you | |||||
| ut | out | |||||
| sin | your | |||||
| dem | them | |||||
| oss | us | |||||
| opp | up | |||||
| man | you/one | |||||
| kan | can | |||||
| hans | his | |||||
| hvor | where | |||||
| eller | or | |||||
| hva | what | |||||
| skal | shall/must | |||||
| selv | self (reflective) | |||||
| sjøl | self (reflective) | |||||
| her | here | |||||
| alle | all | |||||
| vil | will | |||||
| bli | become | |||||
| ble | became | |||||
| blei | became * | |||||
| blitt | have become | |||||
| kunne | could | |||||
| inn | in | |||||
| når | when | |||||
| være | be | |||||
| kom | come | |||||
| noen | some | |||||
| noe | some | |||||
| ville | would | |||||
| dere | you | |||||
| som | who/which/that | |||||
| deres | their/theirs | |||||
| kun | only/just | |||||
| ja | yes | |||||
| etter | after | |||||
| ned | down | |||||
| skulle | should | |||||
| denne | this | |||||
| for | for/because | |||||
| deg | you | |||||
| si | hers/his | |||||
| sine | hers/his | |||||
| sitt | hers/his | |||||
| mot | against | |||||
| å | to | |||||
| meget | much | |||||
| hvorfor | why | |||||
| dette | this | |||||
| disse | these/those | |||||
| uten | without | |||||
| hvordan | how | |||||
| ingen | none | |||||
| din | your | |||||
| ditt | your | |||||
| blir | become | |||||
| samme | same | |||||
| hvilken | which | |||||
| hvilke | which (plural) | |||||
| sånn | such a | |||||
| inni | inside/within | |||||
| mellom | between | |||||
| vår | our | |||||
| hver | each | |||||
| hvem | who | |||||
| vors | us/ours | |||||
| hvis | whose | |||||
| både | both | |||||
| bare | only/just | |||||
| enn | than | |||||
| fordi | as/because | |||||
| før | before | |||||
| mange | many | |||||
| også | also | |||||
| slik | just | |||||
| vært | been | |||||
| være | to be | |||||
| båe | both * | |||||
| begge | both | |||||
| siden | since | |||||
| dykk | your * | |||||
| dykkar | yours * | |||||
| dei | they * | |||||
| deira | them * | |||||
| deires | theirs * | |||||
| deim | them * | |||||
| di | your (fem.) * | |||||
| då | as/when * | |||||
| eg | I * | |||||
| ein | a/an * | |||||
| eit | a/an * | |||||
| eitt | a/an * | |||||
| elles | or * | |||||
| honom | he * | |||||
| hjå | at * | |||||
| ho | she * | |||||
| hoe | she * | |||||
| henne | her | |||||
| hennar | her/hers | |||||
| hennes | hers | |||||
| hoss | how * | |||||
| hossen | how * | |||||
| ikkje | not * | |||||
| ingi | noone * | |||||
| inkje | noone * | |||||
| korleis | how * | |||||
| korso | how * | |||||
| kva | what/which * | |||||
| kvar | where * | |||||
| kvarhelst | where * | |||||
| kven | who/whom * | |||||
| kvi | why * | |||||
| kvifor | why * | |||||
| me | we * | |||||
| medan | while * | |||||
| mi | my * | |||||
| mine | my * | |||||
| mykje | much * | |||||
| no | now * | |||||
| nokon | some (masc./neut.) * | |||||
| noka | some (fem.) * | |||||
| nokor | some * | |||||
| noko | some * | |||||
| nokre | some * | |||||
| si | his/hers * | |||||
| sia | since * | |||||
| sidan | since * | |||||
| so | so * | |||||
| somt | some * | |||||
| somme | some * | |||||
| um | about* | |||||
| upp | up * | |||||
| vere | be * | |||||
| vore | was * | |||||
| verte | become * | |||||
| vort | become * | |||||
| varte | became * | |||||
| vart | became * | |||||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | A Portuguese stop word list. Comments begin with vertical bar. Each stop | |||||
| | word is at the start of a line. | |||||
| | The following is a ranked list (commonest to rarest) of stopwords | |||||
| | deriving from a large sample of text. | |||||
| | Extra words have been added at the end. | |||||
| de | of, from | |||||
| a | the; to, at; her | |||||
| o | the; him | |||||
| que | who, that | |||||
| e | and | |||||
| do | de + o | |||||
| da | de + a | |||||
| em | in | |||||
| um | a | |||||
| para | for | |||||
| | é from SER | |||||
| com | with | |||||
| não | not, no | |||||
| uma | a | |||||
| os | the; them | |||||
| no | em + o | |||||
| se | himself etc | |||||
| na | em + a | |||||
| por | for | |||||
| mais | more | |||||
| as | the; them | |||||
| dos | de + os | |||||
| como | as, like | |||||
| mas | but | |||||
| | foi from SER | |||||
| ao | a + o | |||||
| ele | he | |||||
| das | de + as | |||||
| | tem from TER | |||||
| à | a + a | |||||
| seu | his | |||||
| sua | her | |||||
| ou | or | |||||
| | ser from SER | |||||
| quando | when | |||||
| muito | much | |||||
| | há from HAV | |||||
| nos | em + os; us | |||||
| já | already, now | |||||
| | está from EST | |||||
| eu | I | |||||
| também | also | |||||
| só | only, just | |||||
| pelo | per + o | |||||
| pela | per + a | |||||
| até | up to | |||||
| isso | that | |||||
| ela | he | |||||
| entre | between | |||||
| | era from SER | |||||
| depois | after | |||||
| sem | without | |||||
| mesmo | same | |||||
| aos | a + os | |||||
| | ter from TER | |||||
| seus | his | |||||
| quem | whom | |||||
| nas | em + as | |||||
| me | me | |||||
| esse | that | |||||
| eles | they | |||||
| | estão from EST | |||||
| você | you | |||||
| | tinha from TER | |||||
| | foram from SER | |||||
| essa | that | |||||
| num | em + um | |||||
| nem | nor | |||||
| suas | her | |||||
| meu | my | |||||
| às | a + as | |||||
| minha | my | |||||
| | têm from TER | |||||
| numa | em + uma | |||||
| pelos | per + os | |||||
| elas | they | |||||
| | havia from HAV | |||||
| | seja from SER | |||||
| qual | which | |||||
| | será from SER | |||||
| nós | we | |||||
| | tenho from TER | |||||
| lhe | to him, her | |||||
| deles | of them | |||||
| essas | those | |||||
| esses | those | |||||
| pelas | per + as | |||||
| este | this | |||||
| | fosse from SER | |||||
| dele | of him | |||||
| | other words. There are many contractions such as naquele = em+aquele, | |||||
| | mo = me+o, but they are rare. | |||||
| | Indefinite article plural forms are also rare. | |||||
| tu | thou | |||||
| te | thee | |||||
| vocês | you (plural) | |||||
| vos | you | |||||
| lhes | to them | |||||
| meus | my | |||||
| minhas | |||||
| teu | thy | |||||
| tua | |||||
| teus | |||||
| tuas | |||||
| nosso | our | |||||
| nossa | |||||
| nossos | |||||
| nossas | |||||
| dela | of her | |||||
| delas | of them | |||||
| esta | this | |||||
| estes | these | |||||
| estas | these | |||||
| aquele | that | |||||
| aquela | that | |||||
| aqueles | those | |||||
| aquelas | those | |||||
| isto | this | |||||
| aquilo | that | |||||
| | forms of estar, to be (not including the infinitive): | |||||
| estou | |||||
| está | |||||
| estamos | |||||
| estão | |||||
| estive | |||||
| esteve | |||||
| estivemos | |||||
| estiveram | |||||
| estava | |||||
| estávamos | |||||
| estavam | |||||
| estivera | |||||
| estivéramos | |||||
| esteja | |||||
| estejamos | |||||
| estejam | |||||
| estivesse | |||||
| estivéssemos | |||||
| estivessem | |||||
| estiver | |||||
| estivermos | |||||
| estiverem | |||||
| | forms of haver, to have (not including the infinitive): | |||||
| hei | |||||
| há | |||||
| havemos | |||||
| hão | |||||
| houve | |||||
| houvemos | |||||
| houveram | |||||
| houvera | |||||
| houvéramos | |||||
| haja | |||||
| hajamos | |||||
| hajam | |||||
| houvesse | |||||
| houvéssemos | |||||
| houvessem | |||||
| houver | |||||
| houvermos | |||||
| houverem | |||||
| houverei | |||||
| houverá | |||||
| houveremos | |||||
| houverão | |||||
| houveria | |||||
| houveríamos | |||||
| houveriam | |||||
| | forms of ser, to be (not including the infinitive): | |||||
| sou | |||||
| somos | |||||
| são | |||||
| era | |||||
| éramos | |||||
| eram | |||||
| fui | |||||
| foi | |||||
| fomos | |||||
| foram | |||||
| fora | |||||
| fôramos | |||||
| seja | |||||
| sejamos | |||||
| sejam | |||||
| fosse | |||||
| fôssemos | |||||
| fossem | |||||
| for | |||||
| formos | |||||
| forem | |||||
| serei | |||||
| será | |||||
| seremos | |||||
| serão | |||||
| seria | |||||
| seríamos | |||||
| seriam | |||||
| | forms of ter, to have (not including the infinitive): | |||||
| tenho | |||||
| tem | |||||
| temos | |||||
| tém | |||||
| tinha | |||||
| tínhamos | |||||
| tinham | |||||
| tive | |||||
| teve | |||||
| tivemos | |||||
| tiveram | |||||
| tivera | |||||
| tivéramos | |||||
| tenha | |||||
| tenhamos | |||||
| tenham | |||||
| tivesse | |||||
| tivéssemos | |||||
| tivessem | |||||
| tiver | |||||
| tivermos | |||||
| tiverem | |||||
| terei | |||||
| terá | |||||
| teremos | |||||
| terão | |||||
| teria | |||||
| teríamos | |||||
| teriam |
| # This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
| # Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| acea | |||||
| aceasta | |||||
| această | |||||
| aceea | |||||
| acei | |||||
| aceia | |||||
| acel | |||||
| acela | |||||
| acele | |||||
| acelea | |||||
| acest | |||||
| acesta | |||||
| aceste | |||||
| acestea | |||||
| aceşti | |||||
| aceştia | |||||
| acolo | |||||
| acum | |||||
| ai | |||||
| aia | |||||
| aibă | |||||
| aici | |||||
| al | |||||
| ăla | |||||
| ale | |||||
| alea | |||||
| ălea | |||||
| altceva | |||||
| altcineva | |||||
| am | |||||
| ar | |||||
| are | |||||
| aş | |||||
| aşadar | |||||
| asemenea | |||||
| asta | |||||
| ăsta | |||||
| astăzi | |||||
| astea | |||||
| ăstea | |||||
| ăştia | |||||
| asupra | |||||
| aţi | |||||
| au | |||||
| avea | |||||
| avem | |||||
| aveţi | |||||
| azi | |||||
| bine | |||||
| bucur | |||||
| bună | |||||
| ca | |||||
| că | |||||
| căci | |||||
| când | |||||
| care | |||||
| cărei | |||||
| căror | |||||
| cărui | |||||
| cât | |||||
| câte | |||||
| câţi | |||||
| către | |||||
| câtva | |||||
| ce | |||||
| cel | |||||
| ceva | |||||
| chiar | |||||
| cînd | |||||
| cine | |||||
| cineva | |||||
| cît | |||||
| cîte | |||||
| cîţi | |||||
| cîtva | |||||
| contra | |||||
| cu | |||||
| cum | |||||
| cumva | |||||
| curând | |||||
| curînd | |||||
| da | |||||
| dă | |||||
| dacă | |||||
| dar | |||||
| datorită | |||||
| de | |||||
| deci | |||||
| deja | |||||
| deoarece | |||||
| departe | |||||
| deşi | |||||
| din | |||||
| dinaintea | |||||
| dintr | |||||
| dintre | |||||
| drept | |||||
| după | |||||
| ea | |||||
| ei | |||||
| el | |||||
| ele | |||||
| eram | |||||
| este | |||||
| eşti | |||||
| eu | |||||
| face | |||||
| fără | |||||
| fi | |||||
| fie | |||||
| fiecare | |||||
| fii | |||||
| fim | |||||
| fiţi | |||||
| iar | |||||
| ieri | |||||
| îi | |||||
| îl | |||||
| îmi | |||||
| împotriva | |||||
| în | |||||
| înainte | |||||
| înaintea | |||||
| încât | |||||
| încît | |||||
| încotro | |||||
| între | |||||
| întrucât | |||||
| întrucît | |||||
| îţi | |||||
| la | |||||
| lângă | |||||
| le | |||||
| li | |||||
| lîngă | |||||
| lor | |||||
| lui | |||||
| mă | |||||
| mâine | |||||
| mea | |||||
| mei | |||||
| mele | |||||
| mereu | |||||
| meu | |||||
| mi | |||||
| mine | |||||
| mult | |||||
| multă | |||||
| mulţi | |||||
| ne | |||||
| nicăieri | |||||
| nici | |||||
| nimeni | |||||
| nişte | |||||
| noastră | |||||
| noastre | |||||
| noi | |||||
| noştri | |||||
| nostru | |||||
| nu | |||||
| ori | |||||
| oricând | |||||
| oricare | |||||
| oricât | |||||
| orice | |||||
| oricînd | |||||
| oricine | |||||
| oricît | |||||
| oricum | |||||
| oriunde | |||||
| până | |||||
| pe | |||||
| pentru | |||||
| peste | |||||
| pînă | |||||
| poate | |||||
| pot | |||||
| prea | |||||
| prima | |||||
| primul | |||||
| prin | |||||
| printr | |||||
| sa | |||||
| să | |||||
| săi | |||||
| sale | |||||
| sau | |||||
| său | |||||
| se | |||||
| şi | |||||
| sînt | |||||
| sîntem | |||||
| sînteţi | |||||
| spre | |||||
| sub | |||||
| sunt | |||||
| suntem | |||||
| sunteţi | |||||
| ta | |||||
| tăi | |||||
| tale | |||||
| tău | |||||
| te | |||||
| ţi | |||||
| ţie | |||||
| tine | |||||
| toată | |||||
| toate | |||||
| tot | |||||
| toţi | |||||
| totuşi | |||||
| tu | |||||
| un | |||||
| una | |||||
| unde | |||||
| undeva | |||||
| unei | |||||
| unele | |||||
| uneori | |||||
| unor | |||||
| vă | |||||
| vi | |||||
| voastră | |||||
| voastre | |||||
| voi | |||||
| voştri | |||||
| vostru | |||||
| vouă | |||||
| vreo | |||||
| vreun |
| | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | a russian stop word list. comments begin with vertical bar. each stop | |||||
| | word is at the start of a line. | |||||
| | this is a ranked list (commonest to rarest) of stopwords derived from | |||||
| | a large text sample. | |||||
| | letter `ё' is translated to `е'. | |||||
| и | and | |||||
| в | in/into | |||||
| во | alternative form | |||||
| не | not | |||||
| что | what/that | |||||
| он | he | |||||
| на | on/onto | |||||
| я | i | |||||
| с | from | |||||
| со | alternative form | |||||
| как | how | |||||
| а | milder form of `no' (but) | |||||
| то | conjunction and form of `that' | |||||
| все | all | |||||
| она | she | |||||
| так | so, thus | |||||
| его | him | |||||
| но | but | |||||
| да | yes/and | |||||
| ты | thou | |||||
| к | towards, by | |||||
| у | around, chez | |||||
| же | intensifier particle | |||||
| вы | you | |||||
| за | beyond, behind | |||||
| бы | conditional/subj. particle | |||||
| по | up to, along | |||||
| только | only | |||||
| ее | her | |||||
| мне | to me | |||||
| было | it was | |||||
| вот | here is/are, particle | |||||
| от | away from | |||||
| меня | me | |||||
| еще | still, yet, more | |||||
| нет | no, there isnt/arent | |||||
| о | about | |||||
| из | out of | |||||
| ему | to him | |||||
| теперь | now | |||||
| когда | when | |||||
| даже | even | |||||
| ну | so, well | |||||
| вдруг | suddenly | |||||
| ли | interrogative particle | |||||
| если | if | |||||
| уже | already, but homonym of `narrower' | |||||
| или | or | |||||
| ни | neither | |||||
| быть | to be | |||||
| был | he was | |||||
| него | prepositional form of его | |||||
| до | up to | |||||
| вас | you accusative | |||||
| нибудь | indef. suffix preceded by hyphen | |||||
| опять | again | |||||
| уж | already, but homonym of `adder' | |||||
| вам | to you | |||||
| сказал | he said | |||||
| ведь | particle `after all' | |||||
| там | there | |||||
| потом | then | |||||
| себя | oneself | |||||
| ничего | nothing | |||||
| ей | to her | |||||
| может | usually with `быть' as `maybe' | |||||
| они | they | |||||
| тут | here | |||||
| где | where | |||||
| есть | there is/are | |||||
| надо | got to, must | |||||
| ней | prepositional form of ей | |||||
| для | for | |||||
| мы | we | |||||
| тебя | thee | |||||
| их | them, their | |||||
| чем | than | |||||
| была | she was | |||||
| сам | self | |||||
| чтоб | in order to | |||||
| без | without | |||||
| будто | as if | |||||
| человек | man, person, one | |||||
| чего | genitive form of `what' | |||||
| раз | once | |||||
| тоже | also | |||||
| себе | to oneself | |||||
| под | beneath | |||||
| жизнь | life | |||||
| будет | will be | |||||
| ж | short form of intensifer particle `же' | |||||
| тогда | then | |||||
| кто | who | |||||
| этот | this | |||||
| говорил | was saying | |||||
| того | genitive form of `that' | |||||
| потому | for that reason | |||||
| этого | genitive form of `this' | |||||
| какой | which | |||||
| совсем | altogether | |||||
| ним | prepositional form of `его', `они' | |||||
| здесь | here | |||||
| этом | prepositional form of `этот' | |||||
| один | one | |||||
| почти | almost | |||||
| мой | my | |||||
| тем | instrumental/dative plural of `тот', `то' | |||||
| чтобы | full form of `in order that' | |||||
| нее | her (acc.) | |||||
| кажется | it seems | |||||
| сейчас | now | |||||
| были | they were | |||||
| куда | where to | |||||
| зачем | why | |||||
| сказать | to say | |||||
| всех | all (acc., gen. preposn. plural) | |||||
| никогда | never | |||||
| сегодня | today | |||||
| можно | possible, one can | |||||
| при | by | |||||
| наконец | finally | |||||
| два | two | |||||
| об | alternative form of `о', about | |||||
| другой | another | |||||
| хоть | even | |||||
| после | after | |||||
| над | above | |||||
| больше | more | |||||
| тот | that one (masc.) | |||||
| через | across, in | |||||
| эти | these | |||||
| нас | us | |||||
| про | about | |||||
| всего | in all, only, of all | |||||
| них | prepositional form of `они' (they) | |||||
| какая | which, feminine | |||||
| много | lots | |||||
| разве | interrogative particle | |||||
| сказала | she said | |||||
| три | three | |||||
| эту | this, acc. fem. sing. | |||||
| моя | my, feminine | |||||
| впрочем | moreover, besides | |||||
| хорошо | good | |||||
| свою | ones own, acc. fem. sing. | |||||
| этой | oblique form of `эта', fem. `this' | |||||
| перед | in front of | |||||
| иногда | sometimes | |||||
| лучше | better | |||||
| чуть | a little | |||||
| том | preposn. form of `that one' | |||||
| нельзя | one must not | |||||
| такой | such a one | |||||
| им | to them | |||||
| более | more | |||||
| всегда | always | |||||
| конечно | of course | |||||
| всю | acc. fem. sing of `all' | |||||
| между | between | |||||
| | b: some paradigms | |||||
| | | |||||
| | personal pronouns | |||||
| | | |||||
| | я меня мне мной [мною] | |||||
| | ты тебя тебе тобой [тобою] | |||||
| | он его ему им [него, нему, ним] | |||||
| | она ее эи ею [нее, нэи, нею] | |||||
| | оно его ему им [него, нему, ним] | |||||
| | | |||||
| | мы нас нам нами | |||||
| | вы вас вам вами | |||||
| | они их им ими [них, ним, ними] | |||||
| | | |||||
| | себя себе собой [собою] | |||||
| | | |||||
| | demonstrative pronouns: этот (this), тот (that) | |||||
| | | |||||
| | этот эта это эти | |||||
| | этого эты это эти | |||||
| | этого этой этого этих | |||||
| | этому этой этому этим | |||||
| | этим этой этим [этою] этими | |||||
| | этом этой этом этих | |||||
| | | |||||
| | тот та то те | |||||
| | того ту то те | |||||
| | того той того тех | |||||
| | тому той тому тем | |||||
| | тем той тем [тою] теми | |||||
| | том той том тех | |||||
| | | |||||
| | determinative pronouns | |||||
| | | |||||
| | (a) весь (all) | |||||
| | | |||||
| | весь вся все все | |||||
| | всего всю все все | |||||
| | всего всей всего всех | |||||
| | всему всей всему всем | |||||
| | всем всей всем [всею] всеми | |||||
| | всем всей всем всех | |||||
| | | |||||
| | (b) сам (himself etc) | |||||
| | | |||||
| | сам сама само сами | |||||
| | самого саму само самих | |||||
| | самого самой самого самих | |||||
| | самому самой самому самим | |||||
| | самим самой самим [самою] самими | |||||
| | самом самой самом самих | |||||
| | | |||||
| | stems of verbs `to be', `to have', `to do' and modal | |||||
| | | |||||
| | быть бы буд быв есть суть | |||||
| | име | |||||
| | дел | |||||
| | мог мож мочь | |||||
| | уме | |||||
| | хоч хот | |||||
| | долж | |||||
| | можн | |||||
| | нужн | |||||
| | нельзя | |||||
| | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt | |||||
| | This file is distributed under the BSD License. | |||||
| | See http://snowball.tartarus.org/license.php | |||||
| | Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| | - Encoding was converted to UTF-8. | |||||
| | - This notice was added. | |||||
| | | |||||
| | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| | A Swedish stop word list. Comments begin with vertical bar. Each stop | |||||
| | word is at the start of a line. | |||||
| | This is a ranked list (commonest to rarest) of stopwords derived from | |||||
| | a large text sample. | |||||
| | Swedish stop words occasionally exhibit homonym clashes. For example | |||||
| | så = so, but also seed. These are indicated clearly below. | |||||
| och | and | |||||
| det | it, this/that | |||||
| att | to (with infinitive) | |||||
| i | in, at | |||||
| en | a | |||||
| jag | I | |||||
| hon | she | |||||
| som | who, that | |||||
| han | he | |||||
| på | on | |||||
| den | it, this/that | |||||
| med | with | |||||
| var | where, each | |||||
| sig | him(self) etc | |||||
| för | for | |||||
| så | so (also: seed) | |||||
| till | to | |||||
| är | is | |||||
| men | but | |||||
| ett | a | |||||
| om | if; around, about | |||||
| hade | had | |||||
| de | they, these/those | |||||
| av | of | |||||
| icke | not, no | |||||
| mig | me | |||||
| du | you | |||||
| henne | her | |||||
| då | then, when | |||||
| sin | his | |||||
| nu | now | |||||
| har | have | |||||
| inte | inte någon = no one | |||||
| hans | his | |||||
| honom | him | |||||
| skulle | 'sake' | |||||
| hennes | her | |||||
| där | there | |||||
| min | my | |||||
| man | one (pronoun) | |||||
| ej | nor | |||||
| vid | at, by, on (also: vast) | |||||
| kunde | could | |||||
| något | some etc | |||||
| från | from, off | |||||
| ut | out | |||||
| när | when | |||||
| efter | after, behind | |||||
| upp | up | |||||
| vi | we | |||||
| dem | them | |||||
| vara | be | |||||
| vad | what | |||||
| över | over | |||||
| än | than | |||||
| dig | you | |||||
| kan | can | |||||
| sina | his | |||||
| här | here | |||||
| ha | have | |||||
| mot | towards | |||||
| alla | all | |||||
| under | under (also: wonder) | |||||
| någon | some etc | |||||
| eller | or (else) | |||||
| allt | all | |||||
| mycket | much | |||||
| sedan | since | |||||
| ju | why | |||||
| denna | this/that | |||||
| själv | myself, yourself etc | |||||
| detta | this/that | |||||
| åt | to | |||||
| utan | without | |||||
| varit | was | |||||
| hur | how | |||||
| ingen | no | |||||
| mitt | my | |||||
| ni | you | |||||
| bli | to be, become | |||||
| blev | from bli | |||||
| oss | us | |||||
| din | thy | |||||
| dessa | these/those | |||||
| några | some etc | |||||
| deras | their | |||||
| blir | from bli | |||||
| mina | my | |||||
| samma | (the) same | |||||
| vilken | who, that | |||||
| er | you, your | |||||
| sådan | such a | |||||
| vår | our | |||||
| blivit | from bli | |||||
| dess | its | |||||
| inom | within | |||||
| mellan | between | |||||
| sådant | such a | |||||
| varför | why | |||||
| varje | each | |||||
| vilka | who, that | |||||
| ditt | thy | |||||
| vem | who | |||||
| vilket | who, that | |||||
| sitta | his | |||||
| sådana | such a | |||||
| vart | each | |||||
| dina | thy | |||||
| vars | whose | |||||
| vårt | our | |||||
| våra | our | |||||
| ert | your | |||||
| era | your | |||||
| vilkas | whose | |||||
| # Thai stopwords from: | |||||
| # "Opinion Detection in Thai Political News Columns | |||||
| # Based on Subjectivity Analysis" | |||||
| # Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak | |||||
| ไว้ | |||||
| ไม่ | |||||
| ไป | |||||
| ได้ | |||||
| ให้ | |||||
| ใน | |||||
| โดย | |||||
| แห่ง | |||||
| แล้ว | |||||
| และ | |||||
| แรก | |||||
| แบบ | |||||
| แต่ | |||||
| เอง | |||||
| เห็น | |||||
| เลย | |||||
| เริ่ม | |||||
| เรา | |||||
| เมื่อ | |||||
| เพื่อ | |||||
| เพราะ | |||||
| เป็นการ | |||||
| เป็น | |||||
| เปิดเผย | |||||
| เปิด | |||||
| เนื่องจาก | |||||
| เดียวกัน | |||||
| เดียว | |||||
| เช่น | |||||
| เฉพาะ | |||||
| เคย | |||||
| เข้า | |||||
| เขา | |||||
| อีก | |||||
| อาจ | |||||
| อะไร | |||||
| ออก | |||||
| อย่าง | |||||
| อยู่ | |||||
| อยาก | |||||
| หาก | |||||
| หลาย | |||||
| หลังจาก | |||||
| หลัง | |||||
| หรือ | |||||
| หนึ่ง | |||||
| ส่วน | |||||
| ส่ง | |||||
| สุด | |||||
| สําหรับ | |||||
| ว่า | |||||
| วัน | |||||
| ลง | |||||
| ร่วม | |||||
| ราย | |||||
| รับ | |||||
| ระหว่าง | |||||
| รวม | |||||
| ยัง | |||||
| มี | |||||
| มาก | |||||
| มา | |||||
| พร้อม | |||||
| พบ | |||||
| ผ่าน | |||||
| ผล | |||||
| บาง | |||||
| น่า | |||||
| นี้ | |||||
| นํา | |||||
| นั้น | |||||
| นัก | |||||
| นอกจาก | |||||
| ทุก | |||||
| ที่สุด | |||||
| ที่ | |||||
| ทําให้ | |||||
| ทํา | |||||
| ทาง | |||||
| ทั้งนี้ | |||||
| ทั้ง | |||||
| ถ้า | |||||
| ถูก | |||||
| ถึง | |||||
| ต้อง | |||||
| ต่างๆ | |||||
| ต่าง | |||||
| ต่อ | |||||
| ตาม | |||||
| ตั้งแต่ | |||||
| ตั้ง | |||||
| ด้าน | |||||
| ด้วย | |||||
| ดัง | |||||
| ซึ่ง | |||||
| ช่วง | |||||
| จึง | |||||
| จาก | |||||
| จัด | |||||
| จะ | |||||
| คือ | |||||
| ความ | |||||
| ครั้ง | |||||
| คง | |||||
| ขึ้น | |||||
| ของ | |||||
| ขอ | |||||
| ขณะ | |||||
| ก่อน | |||||
| ก็ | |||||
| การ | |||||
| กับ | |||||
| กัน | |||||
| กว่า | |||||
| กล่าว |
| # Turkish stopwords from LUCENE-559 | |||||
| # merged with the list from "Information Retrieval on Turkish Texts" | |||||
| # (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) | |||||
| acaba | |||||
| altmış | |||||
| altı | |||||
| ama | |||||
| ancak | |||||
| arada | |||||
| aslında | |||||
| ayrıca | |||||
| bana | |||||
| bazı | |||||
| belki | |||||
| ben | |||||
| benden | |||||
| beni | |||||
| benim | |||||
| beri | |||||
| beş | |||||
| bile | |||||
| bin | |||||
| bir | |||||
| birçok | |||||
| biri | |||||
| birkaç | |||||
| birkez | |||||
| birşey | |||||
| birşeyi | |||||
| biz | |||||
| bize | |||||
| bizden | |||||
| bizi | |||||
| bizim | |||||
| böyle | |||||
| böylece | |||||
| bu | |||||
| buna | |||||
| bunda | |||||
| bundan | |||||
| bunlar | |||||
| bunları | |||||
| bunların | |||||
| bunu | |||||
| bunun | |||||
| burada | |||||
| çok | |||||
| çünkü | |||||
| da | |||||
| daha | |||||
| dahi | |||||
| de | |||||
| defa | |||||
| değil | |||||
| diğer | |||||
| diye | |||||
| doksan | |||||
| dokuz | |||||
| dolayı | |||||
| dolayısıyla | |||||
| dört | |||||
| edecek | |||||
| eden | |||||
| ederek | |||||
| edilecek | |||||
| ediliyor | |||||
| edilmesi | |||||
| ediyor | |||||
| eğer | |||||
| elli | |||||
| en | |||||
| etmesi | |||||
| etti | |||||
| ettiği | |||||
| ettiğini | |||||
| gibi | |||||
| göre | |||||
| halen | |||||
| hangi | |||||
| hatta | |||||
| hem | |||||
| henüz | |||||
| hep | |||||
| hepsi | |||||
| her | |||||
| herhangi | |||||
| herkesin | |||||
| hiç | |||||
| hiçbir | |||||
| için | |||||
| iki | |||||
| ile | |||||
| ilgili | |||||
| ise | |||||
| işte | |||||
| itibaren | |||||
| itibariyle | |||||
| kadar | |||||
| karşın | |||||
| katrilyon | |||||
| kendi | |||||
| kendilerine | |||||
| kendini | |||||
| kendisi | |||||
| kendisine | |||||
| kendisini | |||||
| kez | |||||
| ki | |||||
| kim | |||||
| kimden | |||||
| kime | |||||
| kimi | |||||
| kimse | |||||
| kırk | |||||
| milyar | |||||
| milyon | |||||
| mu | |||||
| mü | |||||
| mı | |||||
| nasıl | |||||
| ne | |||||
| neden | |||||
| nedenle | |||||
| nerde | |||||
| nerede | |||||
| nereye | |||||
| niye | |||||
| niçin | |||||
| o | |||||
| olan | |||||
| olarak | |||||
| oldu | |||||
| olduğu | |||||
| olduğunu | |||||
| olduklarını | |||||
| olmadı | |||||
| olmadığı | |||||
| olmak | |||||
| olması | |||||
| olmayan | |||||
| olmaz | |||||
| olsa | |||||
| olsun | |||||
| olup | |||||
| olur | |||||
| olursa | |||||
| oluyor | |||||
| on | |||||
| ona | |||||
| ondan | |||||
| onlar | |||||
| onlardan | |||||
| onları | |||||
| onların | |||||
| onu | |||||
| onun | |||||
| otuz | |||||
| oysa | |||||
| öyle | |||||
| pek | |||||
| rağmen | |||||
| sadece | |||||
| sanki | |||||
| sekiz | |||||
| seksen | |||||
| sen | |||||
| senden | |||||
| seni | |||||
| senin | |||||
| siz | |||||
| sizden | |||||
| sizi | |||||
| sizin | |||||
| şey | |||||
| şeyden | |||||
| şeyi | |||||
| şeyler | |||||
| şöyle | |||||
| şu | |||||
| şuna | |||||
| şunda | |||||
| şundan | |||||
| şunları | |||||
| şunu | |||||
| tarafından | |||||
| trilyon | |||||
| tüm | |||||
| üç | |||||
| üzere | |||||
| var | |||||
| vardı | |||||
| ve | |||||
| veya | |||||
| ya | |||||
| yani | |||||
| yapacak | |||||
| yapılan | |||||
| yapılması | |||||
| yapıyor | |||||
| yapmak | |||||
| yaptı | |||||
| yaptığı | |||||
| yaptığını | |||||
| yaptıkları | |||||
| yedi | |||||
| yerine | |||||
| yetmiş | |||||
| yine | |||||
| yirmi | |||||
| yoksa | |||||
| yüz | |||||
| zaten |
| # | |||||
| # This is a sample user dictionary for Kuromoji (JapaneseTokenizer) | |||||
| # | |||||
| # Add entries to this file in order to override the statistical model in terms | |||||
| # of segmentation, readings and part-of-speech tags. Notice that entries do | |||||
| # not have weights since they are always used when found. This is by-design | |||||
| # in order to maximize ease-of-use. | |||||
| # | |||||
| # Entries are defined using the following CSV format: | |||||
| # <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag> | |||||
| # | |||||
| # Notice that a single half-width space separates tokens and readings, and | |||||
| # that the number tokens and readings must match exactly. | |||||
| # | |||||
| # Also notice that multiple entries with the same <text> is undefined. | |||||
| # | |||||
| # Whitespace only lines are ignored. Comments are not allowed on entry lines. | |||||
| # | |||||
| # Custom segmentation for kanji compounds | |||||
| 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 | |||||
| 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 | |||||
| # Custom segmentation for compound katakana | |||||
| トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 | |||||
| ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 | |||||
| # Custom reading for former sumo wrestler | |||||
| 朝青龍,朝青龍,アサショウリュウ,カスタム人名 |
| {"params":{ | |||||
| "query":{ | |||||
| "defType":"edismax", | |||||
| "q.alt":"*:*", | |||||
| "rows":"10", | |||||
| "fl":"*,score", | |||||
| "":{"v":0}}, | |||||
| "facets":{ | |||||
| "facet":"on", | |||||
| "facet.mincount":"1", | |||||
| "f.doc_type.facet.mincount":"0", | |||||
| "facet.field":["text_shingles","{!ex=type}doc_type", "language"], | |||||
| "f.text_shingles.facet.limit":10, | |||||
| "facet.query":"{!ex=type key=all_types}*:*", | |||||
| "f.doc_type.facet.missing":true, | |||||
| "":{"v":0}}, | |||||
| "browse":{ | |||||
| "type_fq":"{!field f=doc_type v=$type}", | |||||
| "hl":"on", | |||||
| "hl.fl":"content", | |||||
| "v.locale":"${locale}", | |||||
| "debug":"true", | |||||
| "hl.simple.pre":"HL_START", | |||||
| "hl.simple.post":"HL_END", | |||||
| "echoParams": "explicit", | |||||
| "_appends_": { | |||||
| "fq": "{!switch v=$type tag=type case='*:*' case.all='*:*' case.unknown='-doc_type:[* TO *]' default=$type_fq}" | |||||
| }, | |||||
| "":{"v":0}}, | |||||
| "velocity":{ | |||||
| "wt":"velocity", | |||||
| "v.template":"browse", | |||||
| "v.layout":"layout", | |||||
| "":{"v":0}}}} |
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| # (the "License"); you may not use this file except in compliance with | |||||
| # the License. You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| #----------------------------------------------------------------------- | |||||
| # Use a protected word file to protect against the stemmer reducing two | |||||
| # unrelated words to the same base word. | |||||
| # Some non-words that normally won't be encountered, | |||||
| # just to test that they won't be stemmed. | |||||
| dontstems | |||||
| zwhacky | |||||
| <?xml version="1.0" encoding="UTF-8"?> | |||||
| <!-- Solr managed schema - automatically generated - DO NOT EDIT --> | |||||
| <schema name="example-data-driven-schema" version="1.6"> | |||||
| <uniqueKey>id</uniqueKey> | |||||
| <fieldType name="ancestor_path" class="solr.TextField"> | |||||
| <analyzer type="index"> | |||||
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |||||
| </analyzer> | |||||
| <analyzer type="query"> | |||||
| <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="binary" class="solr.BinaryField"/> | |||||
| <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | |||||
| <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> | |||||
| <fieldType name="currency" class="solr.CurrencyFieldType" amountLongSuffix="_l_ns" codeStrSuffix="_s_ns" defaultCurrency="USD" currencyConfig="currency.xml" /> | |||||
| <fieldType name="descendent_path" class="solr.TextField"> | |||||
| <analyzer type="index"> | |||||
| <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> | |||||
| </analyzer> | |||||
| <analyzer type="query"> | |||||
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/> | |||||
| <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | |||||
| <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" maxDistErr="0.001" distErrPct="0.025" distanceUnits="kilometers"/> | |||||
| <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="phonetic_en" class="solr.TextField" indexed="true" stored="false"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> | |||||
| <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> | |||||
| <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> | |||||
| <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> | |||||
| <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> | |||||
| <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> | |||||
| <fieldType name="pint" class="solr.IntPointField" docValues="true"/> | |||||
| <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> | |||||
| <fieldType name="plong" class="solr.LongPointField" docValues="true"/> | |||||
| <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> | |||||
| <fieldType name="point" class="solr.PointType" subFieldSuffix="_d" dimension="2"/> | |||||
| <fieldType name="random" class="solr.RandomSortField" indexed="true"/> | |||||
| <fieldType name="string" class="solr.StrField" sortMissingLast="true"/> | |||||
| <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/> | |||||
| <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ar.txt" ignoreCase="true"/> | |||||
| <filter class="solr.ArabicNormalizationFilterFactory"/> | |||||
| <filter class="solr.ArabicStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_bg.txt" ignoreCase="true"/> | |||||
| <filter class="solr.BulgarianStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_ca.txt" ignoreCase="true"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ca.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.CJKWidthFilterFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.CJKBigramFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_cz.txt" ignoreCase="true"/> | |||||
| <filter class="solr.CzechStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_da.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Danish"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_de.txt" ignoreCase="true"/> | |||||
| <filter class="solr.GermanNormalizationFilterFactory"/> | |||||
| <filter class="solr.GermanLightStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.GreekLowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_el.txt" ignoreCase="false"/> | |||||
| <filter class="solr.GreekStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer type="index"> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.EnglishPossessiveFilterFactory"/> | |||||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
| <filter class="solr.PorterStemFilterFactory"/> | |||||
| </analyzer> | |||||
| <analyzer type="query"> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.EnglishPossessiveFilterFactory"/> | |||||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
| <filter class="solr.PorterStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |||||
| <analyzer type="index"> | |||||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
| <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
| <filter class="solr.PorterStemFilterFactory"/> | |||||
| <filter class="solr.FlattenGraphFilterFactory" /> | |||||
| </analyzer> | |||||
| <analyzer type="query"> | |||||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
| <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
| <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
| <filter class="solr.PorterStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |||||
| <analyzer type="index"> | |||||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
| <filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
| <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
| <filter class="solr.EnglishMinimalStemFilterFactory"/> | |||||
| <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |||||
| <filter class="solr.FlattenGraphFilterFactory" /> | |||||
| </analyzer> | |||||
| <analyzer type="query"> | |||||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
| <filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
| <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
| <filter class="solr.EnglishMinimalStemFilterFactory"/> | |||||
| <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_es.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SpanishLightStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_eu.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Basque"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <charFilter class="solr.PersianCharFilterFactory"/> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.ArabicNormalizationFilterFactory"/> | |||||
| <filter class="solr.PersianNormalizationFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_fa.txt" ignoreCase="true"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fi.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_fr.txt" ignoreCase="true"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fr.txt" ignoreCase="true"/> | |||||
| <filter class="solr.FrenchLightStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_ga.txt" ignoreCase="true"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/hyphenations_ga.txt" ignoreCase="true"/> | |||||
| <filter class="solr.IrishLowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ga.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Irish"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> | |||||
| <analyzer type="index"> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| </analyzer> | |||||
| <analyzer type="query"> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer type="index"> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.ReversedWildcardFilterFactory" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/> | |||||
| </analyzer> | |||||
| <analyzer type="query"> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
| <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_gl.txt" ignoreCase="true"/> | |||||
| <filter class="solr.GalicianStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.IndicNormalizationFilterFactory"/> | |||||
| <filter class="solr.HindiNormalizationFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_hi.txt" ignoreCase="true"/> | |||||
| <filter class="solr.HindiStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_hu.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_hy.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_id.txt" ignoreCase="true"/> | |||||
| <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt" ignoreCase="true"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_it.txt" ignoreCase="true"/> | |||||
| <filter class="solr.ItalianLightStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_ja" class="solr.TextField" autoGeneratePhraseQueries="false" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> | |||||
| <filter class="solr.JapaneseBaseFormFilterFactory"/> | |||||
| <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt"/> | |||||
| <filter class="solr.CJKWidthFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ja.txt" ignoreCase="true"/> | |||||
| <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> | |||||
| <filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> | |||||
| <filter class="solr.KoreanReadingFormFilterFactory" /> | |||||
| <filter class="solr.LowerCaseFilterFactory" /> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_lv.txt" ignoreCase="true"/> | |||||
| <filter class="solr.LatvianStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_nl.txt" ignoreCase="true"/> | |||||
| <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_no.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_pt.txt" ignoreCase="true"/> | |||||
| <filter class="solr.PortugueseLightStemFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_ro.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_ru.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Russian"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_sv.txt" ignoreCase="true"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.ThaiTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_th.txt" ignoreCase="true"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <filter class="solr.TurkishLowerCaseFilterFactory"/> | |||||
| <filter class="solr.StopFilterFactory" words="lang/stopwords_tr.txt" ignoreCase="false"/> | |||||
| <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_email_url" class="solr.TextField"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> | |||||
| <filter class="solr.TypeTokenFilterFactory" types="email_url_types.txt" useWhitelist="true"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <fieldType name="text_shingles" class="solr.TextField" positionIncrementGap="100" multiValued="true"> | |||||
| <analyzer type="index"> | |||||
| <tokenizer class="solr.StandardTokenizerFactory"/> | |||||
| <!-- <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="false" /> --> | |||||
| <filter class="solr.LengthFilterFactory" min="2" max="18"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| <filter class="solr.PatternReplaceFilterFactory" pattern="(^[^a-z]+$)" replacement="" replace="all"/> | |||||
| <filter class="solr.ShingleFilterFactory" minShingleSize="3" maxShingleSize="3" | |||||
| outputUnigrams="false" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="*"/> | |||||
| <filter class="solr.PatternReplaceFilterFactory" pattern="(.*[\*].*)" replacement=""/> | |||||
| <filter class="solr.TrimFilterFactory"/> | |||||
| <!-- PRFF could have removed everything down to an empty string, remove if so --> | |||||
| <filter class="solr.LengthFilterFactory" min="1" max="100"/> | |||||
| </analyzer> | |||||
| <analyzer type="query"> | |||||
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |||||
| <filter class="solr.LowerCaseFilterFactory"/> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <field name="id" type="string" multiValued="false" indexed="true" required="true" stored="true"/> | |||||
| <field name="_version_" type="plong" indexed="true" stored="true"/> | |||||
| <field name="content_type" type="string" indexed="true" stored="true"/> | |||||
| <field name="doc_type" type="string" indexed="true" stored="true"/> | |||||
| <field name="title" type="string" indexed="true" stored="true"/> | |||||
| <field name="language" type="string" indexed="true" stored="true"/> | |||||
| <field name="content" type="text_general" multiValued="false" indexed="true" stored="true"/> | |||||
| <field name="text_shingles" type="text_shingles" indexed="true" stored="false"/> | |||||
| <field name="_text_" type="text_general" multiValued="true" indexed="true" stored="false"/> | |||||
| <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_coordinate" type="pdouble" indexed="true" stored="false"/> | |||||
| <dynamicField name="ignored_*" type="ignored" multiValued="true"/> | |||||
| <dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/> | |||||
| <dynamicField name="random_*" type="random"/> | |||||
| <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_fa" type="text_fa" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_point" type="point" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/> | |||||
| <dynamicField name="attr_*" type="text_general" multiValued="true" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_l_ns" type="plong" indexed="true" stored="false"/> | |||||
| <dynamicField name="*_s_ns" type="string" indexed="true" stored="false"/> | |||||
| <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_dts" type="pdate" multiValued="true" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_is" type="pints" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_ss" type="strings" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_ls" type="plongs" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_fs" type="pfloats" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_ds" type="pdoubles" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_i" type="pint" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_s" type="string" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_l" type="plong" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_f" type="pfloat" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_d" type="pdouble" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_p" type="location" indexed="true" stored="true"/> | |||||
| <dynamicField name="*_c" type="currency" indexed="true" stored="true"/> | |||||
| <copyField source="content" dest="text_shingles"/> | |||||
| <copyField source="*" dest="_text_"/> | |||||
| <!-- ADDED BY SIMON BOWIE 2022-04-04 --> | |||||
| <copyField source="content" dest="year"/> | |||||
| <field name="year" type="year" indexed="true" stored="true"/> | |||||
| <fieldType name="year" class="solr.TextField" positionIncrementGap="100"> | |||||
| <analyzer> | |||||
| <tokenizer class="solr.PatternTokenizerFactory" pattern="=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})" group="1" /> | |||||
| </analyzer> | |||||
| </fieldType> | |||||
| <!-- END --> | |||||
| </schema> |
| # Licensed to the Apache Software Foundation (ASF) under one or more | |||||
| # contributor license agreements. See the NOTICE file distributed with | |||||
| # this work for additional information regarding copyright ownership. | |||||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| # (the "License"); you may not use this file except in compliance with | |||||
| # the License. You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| # (the "License"); you may not use this file except in compliance with | |||||
| # the License. You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| #----------------------------------------------------------------------- | |||||
| #some test synonym mappings unlikely to appear in real input text | |||||
| aaafoo => aaabar | |||||
| bbbfoo => bbbfoo bbbbar | |||||
| cccfoo => cccbar cccbaz | |||||
| fooaaa,baraaa,bazaaa | |||||
| # Some synonym groups specific to this example | |||||
| GB,gib,gigabyte,gigabytes | |||||
| MB,mib,megabyte,megabytes | |||||
| Television, Televisions, TV, TVs | |||||
| #notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming | |||||
| #after us won't split it into two words. | |||||
| # Synonym mappings can be used for spelling correction too | |||||
| pixima => pixma | |||||
| function get_class(name) { | |||||
| var clazz; | |||||
| try { | |||||
| // Java8 Nashorn | |||||
| clazz = eval("Java.type(name).class"); | |||||
| } catch(e) { | |||||
| // Java7 Rhino | |||||
| clazz = eval("Packages."+name); | |||||
| } | |||||
| return clazz; | |||||
| } | |||||
| function processAdd(cmd) { | |||||
| doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument | |||||
| var id = doc.getFieldValue("id"); | |||||
| logger.info("update-script#processAdd: id=" + id); | |||||
| // The idea here is to use the file's content_type value to | |||||
| // simplify into user-friendly values, such that types of, say, image/jpeg and image/tiff | |||||
| // are in an "Images" facet | |||||
| var ct = doc.getFieldValue("content_type"); | |||||
| if (ct) { | |||||
| // strip off semicolon onward | |||||
| var semicolon_index = ct.indexOf(';'); | |||||
| if (semicolon_index != -1) { | |||||
| ct = ct.substring(0,semicolon_index); | |||||
| } | |||||
| // and split type/subtype | |||||
| var ct_type = ct.substring(0,ct.indexOf('/')); | |||||
| var ct_subtype = ct.substring(ct.indexOf('/')+1); | |||||
| var doc_type; | |||||
| switch(true) { | |||||
| case /^application\/rtf/.test(ct) || /wordprocessing/.test(ct): | |||||
| doc_type = "doc"; | |||||
| break; | |||||
| case /html/.test(ct): | |||||
| doc_type = "html"; | |||||
| break; | |||||
| case /^image\/.*/.test(ct): | |||||
| doc_type = "image"; | |||||
| break; | |||||
| case /presentation|powerpoint/.test(ct): | |||||
| doc_type = "presentation"; | |||||
| break; | |||||
| case /spreadsheet|excel/.test(ct): | |||||
| doc_type = "spreadsheet"; | |||||
| break; | |||||
| case /^application\/pdf/.test(ct): | |||||
| doc_type = "pdf"; | |||||
| break; | |||||
| case /^text\/plain/.test(ct): | |||||
| doc_type = "text" | |||||
| break; | |||||
| default: | |||||
| break; | |||||
| } | |||||
| // TODO: error handling needed? What if there is no slash? | |||||
| if(doc_type) { doc.setField("doc_type", doc_type); } | |||||
| doc.setField("content_type_type_s", ct_type); | |||||
| doc.setField("content_type_subtype_s", ct_subtype); | |||||
| } | |||||
| var content = doc.getFieldValue("content"); | |||||
| if (!content) { | |||||
| return; //No content found, so we are done here | |||||
| } | |||||
| var analyzer = | |||||
| req.getCore().getLatestSchema() | |||||
| .getFieldTypeByName("text_email_url") | |||||
| .getIndexAnalyzer(); | |||||
| var token_stream = | |||||
| analyzer.tokenStream("content", content); | |||||
| var term_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.CharTermAttribute")); | |||||
| var type_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.TypeAttribute")); | |||||
| token_stream.reset(); | |||||
| while (token_stream.incrementToken()) { | |||||
| doc.addField(type_att.type().replace(/\<|\>/g,'').toLowerCase()+"_ss", term_att.toString()); | |||||
| } | |||||
| token_stream.end(); | |||||
| token_stream.close(); | |||||
| } | |||||
| function processDelete(cmd) { | |||||
| // no-op | |||||
| } | |||||
| function processMergeIndexes(cmd) { | |||||
| // no-op | |||||
| } | |||||
| function processCommit(cmd) { | |||||
| // no-op | |||||
| } | |||||
| function processRollback(cmd) { | |||||
| // no-op | |||||
| } | |||||
| function finish() { | |||||
| // no-op | |||||
| } |
| <div id="query-box"> | |||||
| <form id="query-form" action="#{url_for_home}" method="GET"> | |||||
| $resource.find: | |||||
| <input type="text" id="q" name="q" style="width: 50%" value="$!esc.html($request.params.get('q'))"/> | |||||
| <input type="submit" value="$resource.submit"/> | |||||
| <div id="debug_query" class="debug"> | |||||
| <span id="parsed_query">$esc.html($response.response.debug.parsedquery)</span> | |||||
| </div> | |||||
| <input type="hidden" name="type" value="#current_type"/> | |||||
| #if("#current_locale"!="")<input type="hidden" value="locale" value="#current_locale"/>#end | |||||
| #foreach($fq in $response.responseHeader.params.getAll("fq")) | |||||
| <input type="hidden" name="fq" id="allFQs" value="$esc.html($fq)"/> | |||||
| #end | |||||
| </form> | |||||
| <div id="constraints"> | |||||
| #foreach($fq in $response.responseHeader.params.getAll("fq")) | |||||
| #set($previous_fq_count=$velocityCount - 1) | |||||
| #if($fq != '') | |||||
| > $fq<a href="#url_for_filters($response.responseHeader.params.fq.subList(0,$previous_fq_count))">x</a> | |||||
| #end | |||||
| #end | |||||
| </div> | |||||
| </div> | |||||
| <div id="browse_results"> | |||||
| #parse("results.vm") | |||||
| </div> | |||||
| ## intentionally empty | |||||
| <div id="facet_$field.name"> | |||||
| <span class="facet-field">$resource.facet.top_phrases</span><br/> | |||||
| <ul id="tagcloud"> | |||||
| #foreach($facet in $sort.sort($field.values,"name")) | |||||
| <li data-weight="$math.mul($facet.count,1)"> | |||||
| <a href="#url_for_facet_filter($field.name, $facet.name)">$facet.name</a> | |||||
| </li> | |||||
| #end | |||||
| </ul> | |||||
| </div> |
| #if($response.facetFields.size() > 0) | |||||
| #foreach($field in $response.facetFields) | |||||
| #if($field.values.size() > 0) | |||||
| #if($engine.resourceExists("facet_${field.name}.vm")) | |||||
| #parse("facet_${field.name}.vm") | |||||
| #else | |||||
| <div id="facet_$field.name" class="facet_field"> | |||||
| <span class="facet-field">#label("facet.${field.name}",$field.name)</span><br/> | |||||
| <ul> | |||||
| #foreach($facet in $field.values) | |||||
| <li><a href="#url_for_facet_filter($field.name, $facet.name)">#if($facet.name!=$null)#label("${field.name}.${facet.name}","${field.name}.${facet.name}")#else<em>missing</em>#end</a> ($facet.count)</li> | |||||
| #end | |||||
| </ul> | |||||
| </div> | |||||
| #end | |||||
| #end | |||||
| #end ## end if field.values > 0 | |||||
| #end ## end if facetFields > 0 | |||||
| <hr/> | |||||
| <div> | |||||
| <div id="admin"><a href="#url_root/index.html#/#{core_name}">Solr Admin</a></div> | |||||
| <a href="#" onclick='jQuery(".debug").toggle(); return false;'>toggle debug mode</a> | |||||
| <a href="#url_for_lens&wt=xml#if($debug)&debug=true#end">XML results</a> ## TODO: Add links for other formats, maybe dynamically? | |||||
| </div> | |||||
| <div> | |||||
| <a href="http://lucene.apache.org/solr">Solr Home Page</a> | |||||
| </div> | |||||
| <div class="debug"> | |||||
| <hr/> | |||||
| Request: | |||||
| <pre> | |||||
| $esc.html($request) | |||||
| </pre> | |||||
| <hr/> | |||||
| Debug: | |||||
| <pre> | |||||
| $esc.html($response.response.debug) | |||||
| </pre> | |||||
| </div> |
| <title>Solr browse: #core_name</title> | |||||
| <meta http-equiv="content-type" content="text/html; charset=UTF-8"/> | |||||
| <link rel="icon" type="image/x-icon" href="#{url_root}/img/favicon.ico"/> | |||||
| <link rel="shortcut icon" type="image/x-icon" href="#{url_root}/img/favicon.ico"/> | |||||
| <script type="text/javascript" src="#{url_root}/libs/jquery-3.4.1.min.js"></script> | |||||
| <script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/jquery.tx3-tag-cloud.js&contentType=text/javascript"></script> | |||||
| <script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/dropit.js&contentType=text/javascript"></script> | |||||
| <script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/jquery.autocomplete.js&contentType=text/javascript"></script> | |||||
| <script type="text/javascript"> | |||||
| $(document).ready(function() { | |||||
| $("#tagcloud").tx3TagCloud({ | |||||
| multiplier: 1 | |||||
| }); | |||||
| $('.menu').dropit(); | |||||
| $( document ).ajaxComplete(function() { | |||||
| $("#tagcloud").tx3TagCloud({ | |||||
| multiplier: 5 | |||||
| }); | |||||
| }); | |||||
| $('\#q').keyup(function() { | |||||
| $('#browse_results').load('#{url_for_home}?#lensNoQ&v.layout.enabled=false&v.template=results&q='+encodeURI($('\#q').val())); | |||||
| $("\#q").autocomplete('#{url_for_solr}/suggest', { | |||||
| extraParams: { | |||||
| 'suggest.q': function() { return $("\#q").val();}, | |||||
| 'suggest.build': 'true', | |||||
| 'wt': 'json', | |||||
| } | |||||
| }).keydown(function(e) { | |||||
| if (e.keyCode === 13){ | |||||
| $("#query-form").trigger('submit'); | |||||
| } | |||||
| }); | |||||
| }); | |||||
| }); | |||||
| </script> | |||||
| <style> | |||||
| html { | |||||
| background-color: #F0F8FF; | |||||
| } | |||||
| body { | |||||
| font-family: Helvetica, Arial, sans-serif; | |||||
| font-size: 10pt; | |||||
| } | |||||
| #header { | |||||
| width: 100%; | |||||
| font-size: 20pt; | |||||
| } | |||||
| #header2 { | |||||
| margin-left:1200px; | |||||
| } | |||||
| #logo { | |||||
| width: 115px; | |||||
| margin: 0px 0px 0px 0px; | |||||
| border-style: none; | |||||
| } | |||||
| a { | |||||
| color: #305CB3; | |||||
| } | |||||
| a.hidden { | |||||
| display:none; | |||||
| } | |||||
| em { | |||||
| color: #FF833D; | |||||
| } | |||||
| .error { | |||||
| color: white; | |||||
| background-color: red; | |||||
| left: 210px; | |||||
| width:80%; | |||||
| position: relative; | |||||
| } | |||||
| .debug { display: none; font-size: 10pt} | |||||
| #debug_query { | |||||
| font-family: Helvetica, Arial, sans-serif; | |||||
| font-size: 10pt; | |||||
| font-weight: bold; | |||||
| } | |||||
| #parsed_query { | |||||
| font-family: Courier, Courier New, monospaced; | |||||
| font-size: 10pt; | |||||
| font-weight: normal; | |||||
| } | |||||
| #admin { | |||||
| text-align: right; | |||||
| vertical-align: top; | |||||
| } | |||||
| #query-form { | |||||
| width: 90%; | |||||
| } | |||||
| #query-box { | |||||
| padding: 5px; | |||||
| margin: 5px; | |||||
| font-weight: normal; | |||||
| font-size: 24px; | |||||
| letter-spacing: 0.08em; | |||||
| } | |||||
| #constraints { | |||||
| margin: 10px; | |||||
| } | |||||
| #tabs { } | |||||
| #tabs li { display: inline; font-size: 10px;} | |||||
| #tabs li a { border-radius: 20px; border: 2px solid #C1CDCD; padding: 10px;color: #42454a; background-color: #dedbde;} | |||||
| #tabs li a:hover { background-color: #f1f0ee; } | |||||
| #tabs li a.selected { color: #000; background-color: #f1f0ee; font-weight: bold; padding: 5px } | |||||
| #tabs li a.no_results { color: #000; background-color: #838B8B; font-style: italic; padding: 5px; pointer-events: none; | |||||
| cursor: default; text-decoration: none;} | |||||
| .pagination { | |||||
| width: 305px; | |||||
| border-radius: 25px; | |||||
| border: 2px solid #C1CDCD; | |||||
| padding: 20px; | |||||
| padding-left: 10%; | |||||
| background: #eee; | |||||
| margin-left: 190px; | |||||
| margin-top : 42px; | |||||
| padding-top: 5px; | |||||
| padding-bottom: 5px; | |||||
| text-align:left; | |||||
| } | |||||
| #results_list { width: 70%; } | |||||
| .result-document { | |||||
| border-radius: 25px; | |||||
| border: 2px solid #C1CDCD; | |||||
| padding: 10px; | |||||
| // width: 800px; | |||||
| // height: 120px; | |||||
| margin: 5px; | |||||
| // margin-left: 60px; | |||||
| // margin-right: 210px; | |||||
| // margin-bottom: 15px; | |||||
| transition: 1s ease; | |||||
| } | |||||
| .result-document:hover | |||||
| { | |||||
| webkit-transform: scale(1.1); | |||||
| -ms-transform: scale(1.1); | |||||
| transform: scale(1.1); | |||||
| transition: 1s ease; | |||||
| } | |||||
| .result-document div { | |||||
| padding: 5px; | |||||
| } | |||||
| .result-title { | |||||
| width:60%; | |||||
| } | |||||
| .result-body { | |||||
| background: #ddd; | |||||
| } | |||||
| .result-document:nth-child(2n+1) { | |||||
| background-color: #FFFFFD; | |||||
| } | |||||
| #facets { | |||||
| margin: 5px; | |||||
| margin-top: 0px; | |||||
| padding: 5px; | |||||
| top: -20px; | |||||
| position: relative; | |||||
| float: right; | |||||
| width: 25%; | |||||
| } | |||||
| .facet-field { | |||||
| font-weight: bold; | |||||
| } | |||||
| #facets ul { | |||||
| list-style: none; | |||||
| margin: 0; | |||||
| margin-bottom: 5px; | |||||
| margin-top: 5px; | |||||
| padding-left: 10px; | |||||
| } | |||||
| #facets ul li { | |||||
| color: #999; | |||||
| padding: 2px; | |||||
| } | |||||
| div.facet_field { | |||||
| clear: left; | |||||
| } | |||||
| ul.tx3-tag-cloud { } | |||||
| ul.tx3-tag-cloud li { | |||||
| display: block; | |||||
| float: left; | |||||
| list-style: none; | |||||
| margin-right: 4px; | |||||
| } | |||||
| ul.tx3-tag-cloud li a { | |||||
| display: block; | |||||
| text-decoration: none; | |||||
| color: #c9c9c9; | |||||
| padding: 3px 10px; | |||||
| } | |||||
| ul.tx3-tag-cloud li a:hover { | |||||
| color: #000000; | |||||
| -webkit-transition: color 250ms linear; | |||||
| -moz-transition: color 250ms linear; | |||||
| -o-transition: color 250ms linear; | |||||
| -ms-transition: color 250ms linear; | |||||
| transition: color 250ms linear; | |||||
| } | |||||
| .dropit { | |||||
| list-style: none; | |||||
| padding: 0; | |||||
| margin: 0; | |||||
| } | |||||
| .dropit .dropit-trigger { position: relative; } | |||||
| .dropit .dropit-submenu { | |||||
| position: absolute; | |||||
| top: 100%; | |||||
| left: 0; /* dropdown left or right */ | |||||
| z-index: 1000; | |||||
| display: none; | |||||
| min-width: 150px; | |||||
| list-style: none; | |||||
| padding: 0; | |||||
| margin: 0; | |||||
| } | |||||
| .dropit .dropit-open .dropit-submenu { display: block; } | |||||
| <!--autocomplete css--> | |||||
| .ac_results { | |||||
| padding: 0px; | |||||
| border: 1px solid black; | |||||
| background-color: white; | |||||
| overflow: hidden; | |||||
| z-index: 99999; | |||||
| } | |||||
| .ac_results ul { | |||||
| width: 100%; | |||||
| list-style-position: outside; | |||||
| list-style: none; | |||||
| padding: 0; | |||||
| margin: 0; | |||||
| } | |||||
| .ac_results li { | |||||
| margin: 0px; | |||||
| padding: 2px 5px; | |||||
| cursor: default; | |||||
| display: block; | |||||
| font: menu; | |||||
| font-size: 12px; | |||||
| line-height: 16px; | |||||
| overflow: hidden; | |||||
| } | |||||
| .ac_loading { | |||||
| // background: white url('˜indicator.gif') right center no-repeat; | |||||
| } | |||||
| .ac_odd { | |||||
| background-color: #eee; | |||||
| } | |||||
| .ac_over { | |||||
| background-color: #0A246A; | |||||
| color: white; | |||||
| } | |||||
| </style> |
| #set($docId = $doc.getFirstValue($request.schema.uniqueKeyField.name)) | |||||
| ## Load Mime-Type List and Mapping | |||||
| #parse('mime_type_lists.vm') | |||||
| ## Title | |||||
| #if($doc.getFieldValue('title')) | |||||
| #set($title = $esc.html($doc.getFirstValue('title'))) | |||||
| #else | |||||
| #set($title = "$doc.getFirstValue('id').substring($math.add(1,$doc.getFirstValue('id').lastIndexOf('/')))") | |||||
| #end | |||||
| ## Date | |||||
| #if($doc.getFieldValue('attr_meta_creation_date')) | |||||
| #set($date = $esc.html($doc.getFirstValue('attr_meta_creation_date'))) | |||||
| #else | |||||
| #set($date = "No date found") | |||||
| #end | |||||
| ## URL | |||||
| #if($doc.getFieldValue('url')) | |||||
| #set($url = $doc.getFieldValue('url')) | |||||
| #elseif($doc.getFieldValue('resourcename')) | |||||
| #set($url = "file:///$doc.getFirstValue('resourcename')") | |||||
| #else | |||||
| #set($url = "$doc.getFieldValue('id')") | |||||
| #end | |||||
| ## Sort out Mime-Type | |||||
| #set($ct = $doc.getFirstValue('content_type').split(";").get(0)) | |||||
| #set($filename = $doc.getFirstValue('resourcename')) | |||||
| #set($filetype = false) | |||||
| #set($filetype = $mimeExtensionsMap.get($ct)) | |||||
| #if(!$filetype) | |||||
| #set($filetype = $filename.substring($filename.lastIndexOf(".")).substring(1)) | |||||
| #end | |||||
| #if(!$filetype) | |||||
| #set($filetype = "file") | |||||
| #end | |||||
| #if(!$supportedMimeTypes.contains($filetype)) | |||||
| #set($filetype = "file") | |||||
| #end | |||||
| <div class="result-document"> | |||||
| <span class="result-title"> | |||||
| <img src="#{url_root}/img/filetypes/${filetype}.png" align="center"> | |||||
| <b>$title</b> | |||||
| </span> | |||||
| <div> | |||||
| id: $docId </br> | |||||
| </div> | |||||
| #set($pad = "") | |||||
| #foreach($v in $response.response.highlighting.get($docId).get("content")) | |||||
| $pad$esc.html($v).replace("HL_START","<em>").replace("HL_END","</em>") | |||||
| #set($pad = " ... ") | |||||
| #end | |||||
| </div> | |||||
| <a href="#" class="debug" onclick='jQuery(this).next().toggle(); return false;'>toggle explain</a> | |||||
| <pre style="display: none;"> | |||||
| $esc.html($response.getExplainMap().get($doc.getFirstValue('id'))) | |||||
| </pre> | |||||
| <a href="#" class="debug" onclick='jQuery(this).next().toggle(); return false;'>show all fields</a> | |||||
| <pre style="display:none;"> | |||||
| #foreach($fieldname in $doc.fieldNames) | |||||
| <span>$fieldname :</span> | |||||
| <span>#foreach($value in $doc.getFieldValues($fieldname))$esc.html($value)#end</span> | |||||
| #end | |||||
| </pre> | |||||
| /* | |||||
| * Dropit v1.1.0 | |||||
| * http://dev7studios.com/dropit | |||||
| * | |||||
| * Copyright 2012, Dev7studios | |||||
| * Free to use and abuse under the MIT license. | |||||
| * http://www.opensource.org/licenses/mit-license.php | |||||
| */ | |||||
| ;(function($) { | |||||
| $.fn.dropit = function(method) { | |||||
| var methods = { | |||||
| init : function(options) { | |||||
| this.dropit.settings = $.extend({}, this.dropit.defaults, options); | |||||
| return this.each(function() { | |||||
| var $el = $(this), | |||||
| el = this, | |||||
| settings = $.fn.dropit.settings; | |||||
| // Hide initial submenus | |||||
| $el.addClass('dropit') | |||||
| .find('>'+ settings.triggerParentEl +':has('+ settings.submenuEl +')').addClass('dropit-trigger') | |||||
| .find(settings.submenuEl).addClass('dropit-submenu').hide(); | |||||
| // Open on click | |||||
| $el.off(settings.action).on(settings.action, settings.triggerParentEl +':has('+ settings.submenuEl +') > '+ settings.triggerEl +'', function(){ | |||||
| // Close click menu's if clicked again | |||||
| if(settings.action == 'click' && $(this).parents(settings.triggerParentEl).hasClass('dropit-open')){ | |||||
| settings.beforeHide.call(this); | |||||
| $(this).parents(settings.triggerParentEl).removeClass('dropit-open').find(settings.submenuEl).hide(); | |||||
| settings.afterHide.call(this); | |||||
| return false; | |||||
| } | |||||
| // Hide open menus | |||||
| settings.beforeHide.call(this); | |||||
| $('.dropit-open').removeClass('dropit-open').find('.dropit-submenu').hide(); | |||||
| settings.afterHide.call(this); | |||||
| // Open this menu | |||||
| settings.beforeShow.call(this); | |||||
| $(this).parents(settings.triggerParentEl).addClass('dropit-open').find(settings.submenuEl).show(); | |||||
| settings.afterShow.call(this); | |||||
| return false; | |||||
| }); | |||||
| // Close if outside click | |||||
| $(document).on('click', function(){ | |||||
| settings.beforeHide.call(this); | |||||
| $('.dropit-open').removeClass('dropit-open').find('.dropit-submenu').hide(); | |||||
| settings.afterHide.call(this); | |||||
| }); | |||||
| // If hover | |||||
| if(settings.action == 'mouseenter'){ | |||||
| $el.on('mouseleave', '.dropit-open', function(){ | |||||
| settings.beforeHide.call(this); | |||||
| $(this).removeClass('dropit-open').find(settings.submenuEl).hide(); | |||||
| settings.afterHide.call(this); | |||||
| }); | |||||
| } | |||||
| settings.afterLoad.call(this); | |||||
| }); | |||||
| } | |||||
| }; | |||||
| if (methods[method]) { | |||||
| return methods[method].apply(this, Array.prototype.slice.call(arguments, 1)); | |||||
| } else if (typeof method === 'object' || !method) { | |||||
| return methods.init.apply(this, arguments); | |||||
| } else { | |||||
| $.error( 'Method "' + method + '" does not exist in dropit plugin!'); | |||||
| } | |||||
| }; | |||||
| $.fn.dropit.defaults = { | |||||
| action: 'mouseenter', // The open action for the trigger | |||||
| submenuEl: 'ul', // The submenu element | |||||
| triggerEl: 'a', // The trigger element | |||||
| triggerParentEl: 'li', // The trigger parent element | |||||
| afterLoad: function(){}, // Triggers when plugin has loaded | |||||
| beforeShow: function(){}, // Triggers before submenu is shown | |||||
| afterShow: function(){}, // Triggers after submenu is shown | |||||
| beforeHide: function(){}, // Triggers before submenu is hidden | |||||
| afterHide: function(){} // Triggers before submenu is hidden | |||||
| }; | |||||
| $.fn.dropit.settings = {}; | |||||
| })(jQuery); |
| /* | |||||
| * Autocomplete - jQuery plugin 1.1pre | |||||
| * | |||||
| * Copyright (c) 2007 Dylan Verheul, Dan G. Switzer, Anjesh Tuladhar, Jörn Zaefferer | |||||
| * | |||||
| * Dual licensed under the MIT and GPL licenses: | |||||
| * http://www.opensource.org/licenses/mit-license.php | |||||
| * http://www.gnu.org/licenses/gpl.html | |||||
| * | |||||
| * Revision: Id: jquery.autocomplete.js 5785 2008-07-12 10:37:33Z joern.zaefferer $ | |||||
| * | |||||
| */ | |||||
| ;(function($) { | |||||
| $.fn.extend({ | |||||
| autocomplete: function(urlOrData, options) { | |||||
| var isUrl = typeof urlOrData == "string"; | |||||
| options = $.extend({}, $.Autocompleter.defaults, { | |||||
| url: isUrl ? urlOrData : null, | |||||
| data: isUrl ? null : urlOrData, | |||||
| delay: isUrl ? $.Autocompleter.defaults.delay : 10, | |||||
| max: options && !options.scroll ? 10 : 150 | |||||
| }, options); | |||||
| // if highlight is set to false, replace it with a do-nothing function | |||||
| options.highlight = options.highlight || function(value) { return value; }; | |||||
| // if the formatMatch option is not specified, then use formatItem for backwards compatibility | |||||
| options.formatMatch = options.formatMatch || options.formatItem; | |||||
| return this.each(function() { | |||||
| new $.Autocompleter(this, options); | |||||
| }); | |||||
| }, | |||||
| result: function(handler) { | |||||
| return this.bind("result", handler); | |||||
| }, | |||||
| search: function(handler) { | |||||
| return this.trigger("search", [handler]); | |||||
| }, | |||||
| flushCache: function() { | |||||
| return this.trigger("flushCache"); | |||||
| }, | |||||
| setOptions: function(options){ | |||||
| return this.trigger("setOptions", [options]); | |||||
| }, | |||||
| unautocomplete: function() { | |||||
| return this.trigger("unautocomplete"); | |||||
| } | |||||
| }); | |||||
| $.Autocompleter = function(input, options) { | |||||
| var KEY = { | |||||
| UP: 38, | |||||
| DOWN: 40, | |||||
| DEL: 46, | |||||
| TAB: 9, | |||||
| RETURN: 13, | |||||
| ESC: 27, | |||||
| COMMA: 188, | |||||
| PAGEUP: 33, | |||||
| PAGEDOWN: 34, | |||||
| BACKSPACE: 8 | |||||
| }; | |||||
| // Create $ object for input element | |||||
| var $input = $(input).attr("autocomplete", "off").addClass(options.inputClass); | |||||
| var timeout; | |||||
| var previousValue = ""; | |||||
| var cache = $.Autocompleter.Cache(options); | |||||
| var hasFocus = 0; | |||||
| var lastKeyPressCode; | |||||
| var config = { | |||||
| mouseDownOnSelect: false | |||||
| }; | |||||
| var select = $.Autocompleter.Select(options, input, selectCurrent, config); | |||||
| var blockSubmit; | |||||
| // prevent form submit in opera when selecting with return key | |||||
| $.browser.opera && $(input.form).bind("submit.autocomplete", function() { | |||||
| if (blockSubmit) { | |||||
| blockSubmit = false; | |||||
| return false; | |||||
| } | |||||
| }); | |||||
| // only opera doesn't trigger keydown multiple times while pressed, others don't work with keypress at all | |||||
| $input.bind(($.browser.opera ? "keypress" : "keydown") + ".autocomplete", function(event) { | |||||
| // track last key pressed | |||||
| lastKeyPressCode = event.keyCode; | |||||
| switch(event.keyCode) { | |||||
| case KEY.UP: | |||||
| event.preventDefault(); | |||||
| if ( select.visible() ) { | |||||
| select.prev(); | |||||
| } else { | |||||
| onChange(0, true); | |||||
| } | |||||
| break; | |||||
| case KEY.DOWN: | |||||
| event.preventDefault(); | |||||
| if ( select.visible() ) { | |||||
| select.next(); | |||||
| } else { | |||||
| onChange(0, true); | |||||
| } | |||||
| break; | |||||
| case KEY.PAGEUP: | |||||
| event.preventDefault(); | |||||
| if ( select.visible() ) { | |||||
| select.pageUp(); | |||||
| } else { | |||||
| onChange(0, true); | |||||
| } | |||||
| break; | |||||
| case KEY.PAGEDOWN: | |||||
| event.preventDefault(); | |||||
| if ( select.visible() ) { | |||||
| select.pageDown(); | |||||
| } else { | |||||
| onChange(0, true); | |||||
| } | |||||
| break; | |||||
| // matches also semicolon | |||||
| case options.multiple && $.trim(options.multipleSeparator) == "," && KEY.COMMA: | |||||
| case KEY.TAB: | |||||
| case KEY.RETURN: | |||||
| if( selectCurrent() ) { | |||||
| // stop default to prevent a form submit, Opera needs special handling | |||||
| event.preventDefault(); | |||||
| blockSubmit = true; | |||||
| return false; | |||||
| } | |||||
| break; | |||||
| case KEY.ESC: | |||||
| select.hide(); | |||||
| break; | |||||
| default: | |||||
| clearTimeout(timeout); | |||||
| timeout = setTimeout(onChange, options.delay); | |||||
| break; | |||||
| } | |||||
| }).focus(function(){ | |||||
| // track whether the field has focus, we shouldn't process any | |||||
| // results if the field no longer has focus | |||||
| hasFocus++; | |||||
| }).blur(function() { | |||||
| hasFocus = 0; | |||||
| if (!config.mouseDownOnSelect) { | |||||
| hideResults(); | |||||
| } | |||||
| }).click(function() { | |||||
| // show select when clicking in a focused field | |||||
| if ( hasFocus++ > 1 && !select.visible() ) { | |||||
| onChange(0, true); | |||||
| } | |||||
| }).bind("search", function() { | |||||
| // TODO why not just specifying both arguments? | |||||
| var fn = (arguments.length > 1) ? arguments[1] : null; | |||||
| function findValueCallback(q, data) { | |||||
| var result; | |||||
| if( data && data.length ) { | |||||
| for (var i=0; i < data.length; i++) { | |||||
| if( data[i].result.toLowerCase() == q.toLowerCase() ) { | |||||
| result = data[i]; | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| if( typeof fn == "function" ) fn(result); | |||||
| else $input.trigger("result", result && [result.data, result.value]); | |||||
| } | |||||
| $.each(trimWords($input.val()), function(i, value) { | |||||
| request(value, findValueCallback, findValueCallback); | |||||
| }); | |||||
| }).bind("flushCache", function() { | |||||
| cache.flush(); | |||||
| }).bind("setOptions", function() { | |||||
| $.extend(options, arguments[1]); | |||||
| // if we've updated the data, repopulate | |||||
| if ( "data" in arguments[1] ) | |||||
| cache.populate(); | |||||
| }).bind("unautocomplete", function() { | |||||
| select.unbind(); | |||||
| $input.unbind(); | |||||
| $(input.form).unbind(".autocomplete"); | |||||
| }); | |||||
| function selectCurrent() { | |||||
| var selected = select.selected(); | |||||
| if( !selected ) | |||||
| return false; | |||||
| var v = selected.result; | |||||
| previousValue = v; | |||||
| if ( options.multiple ) { | |||||
| var words = trimWords($input.val()); | |||||
| if ( words.length > 1 ) { | |||||
| v = words.slice(0, words.length - 1).join( options.multipleSeparator ) + options.multipleSeparator + v; | |||||
| } | |||||
| v += options.multipleSeparator; | |||||
| } | |||||
| $input.val(v); | |||||
| hideResultsNow(); | |||||
| $input.trigger("result", [selected.data, selected.value]); | |||||
| return true; | |||||
| } | |||||
| function onChange(crap, skipPrevCheck) { | |||||
| if( lastKeyPressCode == KEY.DEL ) { | |||||
| select.hide(); | |||||
| return; | |||||
| } | |||||
| var currentValue = $input.val(); | |||||
| if ( !skipPrevCheck && currentValue == previousValue ) | |||||
| return; | |||||
| previousValue = currentValue; | |||||
| currentValue = lastWord(currentValue); | |||||
| if ( currentValue.length >= options.minChars) { | |||||
| $input.addClass(options.loadingClass); | |||||
| if (!options.matchCase) | |||||
| currentValue = currentValue.toLowerCase(); | |||||
| request(currentValue, receiveData, hideResultsNow); | |||||
| } else { | |||||
| stopLoading(); | |||||
| select.hide(); | |||||
| } | |||||
| }; | |||||
| function trimWords(value) { | |||||
| if ( !value ) { | |||||
| return [""]; | |||||
| } | |||||
| var words = value.split( options.multipleSeparator ); | |||||
| var result = []; | |||||
| $.each(words, function(i, value) { | |||||
| if ( $.trim(value) ) | |||||
| result[i] = $.trim(value); | |||||
| }); | |||||
| return result; | |||||
| } | |||||
| function lastWord(value) { | |||||
| if ( !options.multiple ) | |||||
| return value; | |||||
| var words = trimWords(value); | |||||
| return words[words.length - 1]; | |||||
| } | |||||
| // fills in the input box w/the first match (assumed to be the best match) | |||||
| // q: the term entered | |||||
| // sValue: the first matching result | |||||
| function autoFill(q, sValue){ | |||||
| // autofill in the complete box w/the first match as long as the user hasn't entered in more data | |||||
| // if the last user key pressed was backspace, don't autofill | |||||
| if( options.autoFill && (lastWord($input.val()).toLowerCase() == q.toLowerCase()) && lastKeyPressCode != KEY.BACKSPACE ) { | |||||
| // fill in the value (keep the case the user has typed) | |||||
| $input.val($input.val() + sValue.substring(lastWord(previousValue).length)); | |||||
| // select the portion of the value not typed by the user (so the next character will erase) | |||||
| $.Autocompleter.Selection(input, previousValue.length, previousValue.length + sValue.length); | |||||
| } | |||||
| }; | |||||
| function hideResults() { | |||||
| clearTimeout(timeout); | |||||
| timeout = setTimeout(hideResultsNow, 200); | |||||
| }; | |||||
| function hideResultsNow() { | |||||
| var wasVisible = select.visible(); | |||||
| select.hide(); | |||||
| clearTimeout(timeout); | |||||
| stopLoading(); | |||||
| if (options.mustMatch) { | |||||
| // call search and run callback | |||||
| $input.search( | |||||
| function (result){ | |||||
| // if no value found, clear the input box | |||||
| if( !result ) { | |||||
| if (options.multiple) { | |||||
| var words = trimWords($input.val()).slice(0, -1); | |||||
| $input.val( words.join(options.multipleSeparator) + (words.length ? options.multipleSeparator : "") ); | |||||
| } | |||||
| else | |||||
| $input.val( "" ); | |||||
| } | |||||
| } | |||||
| ); | |||||
| } | |||||
| if (wasVisible) | |||||
| // position cursor at end of input field | |||||
| $.Autocompleter.Selection(input, input.value.length, input.value.length); | |||||
| }; | |||||
| function receiveData(q, data) { | |||||
| if ( data && data.length && hasFocus ) { | |||||
| stopLoading(); | |||||
| select.display(data, q); | |||||
| autoFill(q, data[0].value); | |||||
| select.show(); | |||||
| } else { | |||||
| hideResultsNow(); | |||||
| } | |||||
| }; | |||||
| function request(term, success, failure) { | |||||
| if (!options.matchCase) | |||||
| term = term.toLowerCase(); | |||||
| var data = cache.load(term); | |||||
| data = null; // Avoid buggy cache and go to Solr every time | |||||
| // recieve the cached data | |||||
| if (data && data.length) { | |||||
| success(term, data); | |||||
| // if an AJAX url has been supplied, try loading the data now | |||||
| } else if( (typeof options.url == "string") && (options.url.length > 0) ){ | |||||
| var extraParams = { | |||||
| timestamp: +new Date() | |||||
| }; | |||||
| $.each(options.extraParams, function(key, param) { | |||||
| extraParams[key] = typeof param == "function" ? param() : param; | |||||
| }); | |||||
| $.ajax({ | |||||
| // try to leverage ajaxQueue plugin to abort previous requests | |||||
| mode: "abort", | |||||
| // limit abortion to this input | |||||
| port: "autocomplete" + input.name, | |||||
| dataType: options.dataType, | |||||
| url: options.url, | |||||
| data: $.extend({ | |||||
| q: lastWord(term), | |||||
| limit: options.max | |||||
| }, extraParams), | |||||
| success: function(data) { | |||||
| var parsed = options.parse && options.parse(data) || parse(data); | |||||
| cache.add(term, parsed); | |||||
| success(term, parsed); | |||||
| } | |||||
| }); | |||||
| } else { | |||||
| // if we have a failure, we need to empty the list -- this prevents the the [TAB] key from selecting the last successful match | |||||
| select.emptyList(); | |||||
| failure(term); | |||||
| } | |||||
| }; | |||||
| function parse(data) { | |||||
| var parsed = []; | |||||
| var rows = data.split("\n"); | |||||
| for (var i=0; i < rows.length; i++) { | |||||
| var row = $.trim(rows[i]); | |||||
| if (row) { | |||||
| row = row.split("|"); | |||||
| parsed[parsed.length] = { | |||||
| data: row, | |||||
| value: row[0], | |||||
| result: options.formatResult && options.formatResult(row, row[0]) || row[0] | |||||
| }; | |||||
| } | |||||
| } | |||||
| return parsed; | |||||
| }; | |||||
| function stopLoading() { | |||||
| $input.removeClass(options.loadingClass); | |||||
| }; | |||||
| }; | |||||
| $.Autocompleter.defaults = { | |||||
| inputClass: "ac_input", | |||||
| resultsClass: "ac_results", | |||||
| loadingClass: "ac_loading", | |||||
| minChars: 1, | |||||
| delay: 400, | |||||
| matchCase: false, | |||||
| matchSubset: true, | |||||
| matchContains: false, | |||||
| cacheLength: 10, | |||||
| max: 100, | |||||
| mustMatch: false, | |||||
| extraParams: {}, | |||||
| selectFirst: false, | |||||
| formatItem: function(row) { return row[0]; }, | |||||
| formatMatch: null, | |||||
| autoFill: false, | |||||
| width: 0, | |||||
| multiple: false, | |||||
| multipleSeparator: ", ", | |||||
| highlight: function(value, term) { | |||||
| return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1") + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "<strong>$1</strong>"); | |||||
| }, | |||||
| scroll: true, | |||||
| scrollHeight: 180 | |||||
| }; | |||||
| $.Autocompleter.Cache = function(options) { | |||||
| var data = {}; | |||||
| var length = 0; | |||||
| function matchSubset(s, sub) { | |||||
| if (!options.matchCase) | |||||
| s = s.toLowerCase(); | |||||
| var i = s.indexOf(sub); | |||||
| if (options.matchContains == "word"){ | |||||
| i = s.toLowerCase().search("\\b" + sub.toLowerCase()); | |||||
| } | |||||
| if (i == -1) return false; | |||||
| return i == 0 || options.matchContains; | |||||
| }; | |||||
| function add(q, value) { | |||||
| if (length > options.cacheLength){ | |||||
| flush(); | |||||
| } | |||||
| if (!data[q]){ | |||||
| length++; | |||||
| } | |||||
| data[q] = value; | |||||
| } | |||||
| function populate(){ | |||||
| if( !options.data ) return false; | |||||
| // track the matches | |||||
| var stMatchSets = {}, | |||||
| nullData = 0; | |||||
| // no url was specified, we need to adjust the cache length to make sure it fits the local data store | |||||
| if( !options.url ) options.cacheLength = 1; | |||||
| // track all options for minChars = 0 | |||||
| stMatchSets[""] = []; | |||||
| // loop through the array and create a lookup structure | |||||
| for ( var i = 0, ol = options.data.length; i < ol; i++ ) { | |||||
| var rawValue = options.data[i]; | |||||
| // if rawValue is a string, make an array otherwise just reference the array | |||||
| rawValue = (typeof rawValue == "string") ? [rawValue] : rawValue; | |||||
| var value = options.formatMatch(rawValue, i+1, options.data.length); | |||||
| if ( value === false ) | |||||
| continue; | |||||
| var firstChar = value.charAt(0).toLowerCase(); | |||||
| // if no lookup array for this character exists, look it up now | |||||
| if( !stMatchSets[firstChar] ) | |||||
| stMatchSets[firstChar] = []; | |||||
| // if the match is a string | |||||
| var row = { | |||||
| value: value, | |||||
| data: rawValue, | |||||
| result: options.formatResult && options.formatResult(rawValue) || value | |||||
| }; | |||||
| // push the current match into the set list | |||||
| stMatchSets[firstChar].push(row); | |||||
| // keep track of minChars zero items | |||||
| if ( nullData++ < options.max ) { | |||||
| stMatchSets[""].push(row); | |||||
| } | |||||
| }; | |||||
| // add the data items to the cache | |||||
| $.each(stMatchSets, function(i, value) { | |||||
| // increase the cache size | |||||
| options.cacheLength++; | |||||
| // add to the cache | |||||
| add(i, value); | |||||
| }); | |||||
| } | |||||
| // populate any existing data | |||||
| setTimeout(populate, 25); | |||||
| function flush(){ | |||||
| data = {}; | |||||
| length = 0; | |||||
| } | |||||
| return { | |||||
| flush: flush, | |||||
| add: add, | |||||
| populate: populate, | |||||
| load: function(q) { | |||||
| if (!options.cacheLength || !length) | |||||
| return null; | |||||
| /* | |||||
| * if dealing w/local data and matchContains than we must make sure | |||||
| * to loop through all the data collections looking for matches | |||||
| */ | |||||
| if( !options.url && options.matchContains ){ | |||||
| // track all matches | |||||
| var csub = []; | |||||
| // loop through all the data grids for matches | |||||
| for( var k in data ){ | |||||
| // don't search through the stMatchSets[""] (minChars: 0) cache | |||||
| // this prevents duplicates | |||||
| if( k.length > 0 ){ | |||||
| var c = data[k]; | |||||
| $.each(c, function(i, x) { | |||||
| // if we've got a match, add it to the array | |||||
| if (matchSubset(x.value, q)) { | |||||
| csub.push(x); | |||||
| } | |||||
| }); | |||||
| } | |||||
| } | |||||
| return csub; | |||||
| } else | |||||
| // if the exact item exists, use it | |||||
| if (data[q]){ | |||||
| return data[q]; | |||||
| } else | |||||
| if (options.matchSubset) { | |||||
| for (var i = q.length - 1; i >= options.minChars; i--) { | |||||
| var c = data[q.substr(0, i)]; | |||||
| if (c) { | |||||
| var csub = []; | |||||
| $.each(c, function(i, x) { | |||||
| if (matchSubset(x.value, q)) { | |||||
| csub[csub.length] = x; | |||||
| } | |||||
| }); | |||||
| return csub; | |||||
| } | |||||
| } | |||||
| } | |||||
| return null; | |||||
| } | |||||
| }; | |||||
| }; | |||||
| $.Autocompleter.Select = function (options, input, select, config) { | |||||
| var CLASSES = { | |||||
| ACTIVE: "ac_over" | |||||
| }; | |||||
| var listItems, | |||||
| active = -1, | |||||
| data, | |||||
| term = "", | |||||
| needsInit = true, | |||||
| element, | |||||
| list; | |||||
| // Create results | |||||
| function init() { | |||||
| if (!needsInit) | |||||
| return; | |||||
| element = $("<div/>") | |||||
| .hide() | |||||
| .addClass(options.resultsClass) | |||||
| .css("position", "absolute") | |||||
| .appendTo(document.body); | |||||
| list = $("<ul/>").appendTo(element).mouseover( function(event) { | |||||
| if(target(event).nodeName && target(event).nodeName.toUpperCase() == 'LI') { | |||||
| active = $("li", list).removeClass(CLASSES.ACTIVE).index(target(event)); | |||||
| $(target(event)).addClass(CLASSES.ACTIVE); | |||||
| } | |||||
| }).click(function(event) { | |||||
| $(target(event)).addClass(CLASSES.ACTIVE); | |||||
| select(); | |||||
| // TODO provide option to avoid setting focus again after selection? useful for cleanup-on-focus | |||||
| input.focus(); | |||||
| return false; | |||||
| }).mousedown(function() { | |||||
| config.mouseDownOnSelect = true; | |||||
| }).mouseup(function() { | |||||
| config.mouseDownOnSelect = false; | |||||
| }); | |||||
| if( options.width > 0 ) | |||||
| element.css("width", options.width); | |||||
| needsInit = false; | |||||
| } | |||||
| function target(event) { | |||||
| var element = event.target; | |||||
| while(element && element.tagName != "LI") | |||||
| element = element.parentNode; | |||||
| // more fun with IE, sometimes event.target is empty, just ignore it then | |||||
| if(!element) | |||||
| return []; | |||||
| return element; | |||||
| } | |||||
| function moveSelect(step) { | |||||
| listItems.slice(active, active + 1).removeClass(CLASSES.ACTIVE); | |||||
| movePosition(step); | |||||
| var activeItem = listItems.slice(active, active + 1).addClass(CLASSES.ACTIVE); | |||||
| if(options.scroll) { | |||||
| var offset = 0; | |||||
| listItems.slice(0, active).each(function() { | |||||
| offset += this.offsetHeight; | |||||
| }); | |||||
| if((offset + activeItem[0].offsetHeight - list.scrollTop()) > list[0].clientHeight) { | |||||
| list.scrollTop(offset + activeItem[0].offsetHeight - list.innerHeight()); | |||||
| } else if(offset < list.scrollTop()) { | |||||
| list.scrollTop(offset); | |||||
| } | |||||
| } | |||||
| }; | |||||
| function movePosition(step) { | |||||
| active += step; | |||||
| if (active < 0) { | |||||
| active = listItems.size() - 1; | |||||
| } else if (active >= listItems.size()) { | |||||
| active = 0; | |||||
| } | |||||
| } | |||||
| function limitNumberOfItems(available) { | |||||
| return options.max && options.max < available | |||||
| ? options.max | |||||
| : available; | |||||
| } | |||||
| function fillList() { | |||||
| list.empty(); | |||||
| var max = limitNumberOfItems(data.length); | |||||
| for (var i=0; i < max; i++) { | |||||
| if (!data[i]) | |||||
| continue; | |||||
| var formatted = options.formatItem(data[i].data, i+1, max, data[i].value, term); | |||||
| if ( formatted === false ) | |||||
| continue; | |||||
| var li = $("<li/>").html( options.highlight(formatted, term) ).addClass(i%2 == 0 ? "ac_even" : "ac_odd").appendTo(list)[0]; | |||||
| $.data(li, "ac_data", data[i]); | |||||
| } | |||||
| listItems = list.find("li"); | |||||
| if ( options.selectFirst ) { | |||||
| listItems.slice(0, 1).addClass(CLASSES.ACTIVE); | |||||
| active = 0; | |||||
| } | |||||
| // apply bgiframe if available | |||||
| if ( $.fn.bgiframe ) | |||||
| list.bgiframe(); | |||||
| } | |||||
| return { | |||||
| display: function(d, q) { | |||||
| init(); | |||||
| data = d; | |||||
| term = q; | |||||
| fillList(); | |||||
| }, | |||||
| next: function() { | |||||
| moveSelect(1); | |||||
| }, | |||||
| prev: function() { | |||||
| moveSelect(-1); | |||||
| }, | |||||
| pageUp: function() { | |||||
| if (active != 0 && active - 8 < 0) { | |||||
| moveSelect( -active ); | |||||
| } else { | |||||
| moveSelect(-8); | |||||
| } | |||||
| }, | |||||
| pageDown: function() { | |||||
| if (active != listItems.size() - 1 && active + 8 > listItems.size()) { | |||||
| moveSelect( listItems.size() - 1 - active ); | |||||
| } else { | |||||
| moveSelect(8); | |||||
| } | |||||
| }, | |||||
| hide: function() { | |||||
| element && element.hide(); | |||||
| listItems && listItems.removeClass(CLASSES.ACTIVE); | |||||
| active = -1; | |||||
| }, | |||||
| visible : function() { | |||||
| return element && element.is(":visible"); | |||||
| }, | |||||
| current: function() { | |||||
| return this.visible() && (listItems.filter("." + CLASSES.ACTIVE)[0] || options.selectFirst && listItems[0]); | |||||
| }, | |||||
| show: function() { | |||||
| var offset = $(input).offset(); | |||||
| element.css({ | |||||
| width: typeof options.width == "string" || options.width > 0 ? options.width : $(input).width(), | |||||
| top: offset.top + input.offsetHeight, | |||||
| left: offset.left | |||||
| }).show(); | |||||
| if(options.scroll) { | |||||
| list.scrollTop(0); | |||||
| list.css({ | |||||
| maxHeight: options.scrollHeight, | |||||
| overflow: 'auto' | |||||
| }); | |||||
| if($.browser.msie && typeof document.body.style.maxHeight === "undefined") { | |||||
| var listHeight = 0; | |||||
| listItems.each(function() { | |||||
| listHeight += this.offsetHeight; | |||||
| }); | |||||
| var scrollbarsVisible = listHeight > options.scrollHeight; | |||||
| list.css('height', scrollbarsVisible ? options.scrollHeight : listHeight ); | |||||
| if (!scrollbarsVisible) { | |||||
| // IE doesn't recalculate width when scrollbar disappears | |||||
| listItems.width( list.width() - parseInt(listItems.css("padding-left")) - parseInt(listItems.css("padding-right")) ); | |||||
| } | |||||
| } | |||||
| } | |||||
| }, | |||||
| selected: function() { | |||||
| var selected = listItems && listItems.filter("." + CLASSES.ACTIVE).removeClass(CLASSES.ACTIVE); | |||||
| return selected && selected.length && $.data(selected[0], "ac_data"); | |||||
| }, | |||||
| emptyList: function (){ | |||||
| list && list.empty(); | |||||
| }, | |||||
| unbind: function() { | |||||
| element && element.remove(); | |||||
| } | |||||
| }; | |||||
| }; | |||||
| $.Autocompleter.Selection = function(field, start, end) { | |||||
| if( field.createTextRange ){ | |||||
| var selRange = field.createTextRange(); | |||||
| selRange.collapse(true); | |||||
| selRange.moveStart("character", start); | |||||
| selRange.moveEnd("character", end); | |||||
| selRange.select(); | |||||
| } else if( field.setSelectionRange ){ | |||||
| field.setSelectionRange(start, end); | |||||
| } else { | |||||
| if( field.selectionStart ){ | |||||
| field.selectionStart = start; | |||||
| field.selectionEnd = end; | |||||
| } | |||||
| } | |||||
| field.focus(); | |||||
| }; | |||||
| })(jQuery); |
| /* | |||||
| * ---------------------------------------------------------------------------- | |||||
| * "THE BEER-WARE LICENSE" (Revision 42): | |||||
| * Tuxes3 wrote this file. As long as you retain this notice you | |||||
| * can do whatever you want with this stuff. If we meet some day, and you think | |||||
| * this stuff is worth it, you can buy me a beer in return Tuxes3 | |||||
| * ---------------------------------------------------------------------------- | |||||
| */ | |||||
| (function($) | |||||
| { | |||||
| var settings; | |||||
| $.fn.tx3TagCloud = function(options) | |||||
| { | |||||
| // | |||||
| // DEFAULT SETTINGS | |||||
| // | |||||
| settings = $.extend({ | |||||
| multiplier : 1 | |||||
| }, options); | |||||
| main(this); | |||||
| } | |||||
| function main(element) | |||||
| { | |||||
| // adding style attr | |||||
| element.addClass("tx3-tag-cloud"); | |||||
| addListElementFontSize(element); | |||||
| } | |||||
| /** | |||||
| * calculates the font size on each li element | |||||
| * according to their data-weight attribut | |||||
| */ | |||||
| function addListElementFontSize(element) | |||||
| { | |||||
| var hDataWeight = -9007199254740992; | |||||
| var lDataWeight = 9007199254740992; | |||||
| $.each(element.find("li"), function(){ | |||||
| cDataWeight = getDataWeight(this); | |||||
| if (cDataWeight == undefined) | |||||
| { | |||||
| logWarning("No \"data-weight\" attribut defined on <li> element"); | |||||
| } | |||||
| else | |||||
| { | |||||
| hDataWeight = cDataWeight > hDataWeight ? cDataWeight : hDataWeight; | |||||
| lDataWeight = cDataWeight < lDataWeight ? cDataWeight : lDataWeight; | |||||
| } | |||||
| }); | |||||
| $.each(element.find("li"), function(){ | |||||
| var dataWeight = getDataWeight(this); | |||||
| var percent = Math.abs((dataWeight - lDataWeight)/(lDataWeight - hDataWeight)); | |||||
| $(this).css('font-size', (1 + (percent * settings['multiplier'])) + "em"); | |||||
| }); | |||||
| } | |||||
| function getDataWeight(element) | |||||
| { | |||||
| return parseInt($(element).attr("data-weight")); | |||||
| } | |||||
| function logWarning(message) | |||||
| { | |||||
| console.log("[WARNING] " + Date.now() + " : " + message); | |||||
| } | |||||
| }(jQuery)); |
| <html> | |||||
| <head> | |||||
| #parse("head.vm") | |||||
| </head> | |||||
| <body> | |||||
| <div id="header"> | |||||
| <a href="#url_for_home"><img src="#{url_root}/img/solr.svg" id="logo" title="Solr"/></a> $resource.powered_file_search | |||||
| </div> | |||||
| <div id="header2" onclick="javascript:locale_select()"> | |||||
| <ul class="menu"> | |||||
| <li> | |||||
| <a href="#"><img src="#{url_for_solr}/admin/file?file=/velocity/img/globe_256.png&contentType=image/png" id="locale_pic" title="locale_select" width="30px" height="27px"/></a> | |||||
| <ul> | |||||
| <li><a href="#url_for_locale('fr_FR')" #if("#current_locale"=="fr_FR")class="hidden"#end> | |||||
| <img src="#{url_for_solr}/admin/file?file=/velocity/img/france_640.png&contentType=image/png" id="french_flag" width="40px" height="40px"/>Français</a></li> | |||||
| <li><a href="#url_for_locale('de_DE')" #if("#current_locale"=="de_DE")class="hidden"#end> | |||||
| <img src="#{url_for_solr}/admin/file?file=/velocity/img/germany_640.png&contentType=image/png" id="german_flag" width="40px" height="40px"/>Deutsch</a></li> | |||||
| <li><a href="#url_for_locale('')" #if("#current_locale"=="")class="hidden"#end> | |||||
| <img src="#{url_for_solr}/admin/file?file=/velocity/img/english_640.png&contentType=image/png" id="english_flag" width="40px" height="40px"/>English</a></li> | |||||
| </ul> | |||||
| </li> | |||||
| </ul> | |||||
| </div> | |||||
| #if($response.response.error.code) | |||||
| <div class="error"> | |||||
| <h1>ERROR $response.response.error.code</h1> | |||||
| $response.response.error.msg | |||||
| </div> | |||||
| #else | |||||
| <div id="content"> | |||||
| $content | |||||
| </div> | |||||
| #end | |||||
| <div id="footer"> | |||||
| #parse("footer.vm") | |||||
| </div> | |||||
| </body> | |||||
| </html> |
| #macro(lensFilterSortOnly)?#if($response.responseHeader.params.getAll("fq").size() > 0)&#fqs($response.responseHeader.params.getAll("fq"))#end#sort($request.params.getParams('sort'))#end | |||||
| #macro(lensNoQ)#lensFilterSortOnly&type=#current_type#if("#current_locale"!="")&locale=#current_locale#end#end | |||||
| #macro(lensNoType)#lensFilterSortOnly#q#if("#current_locale"!="")&locale=#current_locale#end#end | |||||
| #macro(lensNoLocale)#lensFilterSortOnly#q&type=#current_type#end | |||||
| ## lens modified for example/files - to use fq from responseHeader rather than request, and #debug removed too as it is built into browse params now, also added type to lens | |||||
| #macro(lens)#lensNoQ#q#end | |||||
| ## Macros defined custom for the "files" example | |||||
| #macro(url_for_type $type)#url_for_home#lensNoType&type=$type#end | |||||
| #macro(current_type)#if($response.responseHeader.params.type)${response.responseHeader.params.type}#{else}all#end#end | |||||
| #macro(url_for_locale $locale)#url_for_home#lensNoLocale#if($locale!="")&locale=$locale#end&start=$page.start#end | |||||
| #macro(current_locale)$!{response.responseHeader.params.locale}#end | |||||
| ## Usage: #label(resource_key[, default_value]) - resource_key is used as label if no default value specified and no resource exists | |||||
| #macro(label $key $default)#if($resource.get($key).exists)${resource.get($key)}#else#if($default)$default#else${key}#end#end#end |
| #** | |||||
| * Define some Mime-Types, short and long form | |||||
| *# | |||||
| ## MimeType to extension map for detecting file type | |||||
| ## and showing proper icon | |||||
| ## List of types match the icons in /solr/img/filetypes | |||||
| ## Short MimeType Names | |||||
| ## Was called $supportedtypes | |||||
| #set($supportedMimeTypes = "7z;ai;aiff;asc;audio;bin;bz2;c;cfc;cfm;chm;class;conf;cpp;cs;css;csv;deb;divx;doc;dot;eml;enc;file;gif;gz;hlp;htm;html;image;iso;jar;java;jpeg;jpg;js;lua;m;mm;mov;mp3;mpg;odc;odf;odg;odi;odp;ods;odt;ogg;pdf;pgp;php;pl;png;ppt;ps;py;ram;rar;rb;rm;rpm;rtf;sig;sql;swf;sxc;sxd;sxi;sxw;tar;tex;tgz;txt;vcf;video;vsd;wav;wma;wmv;xls;xml;xpi;xvid;zip") | |||||
| ## Long Form: map MimeType headers to our Short names | |||||
| ## Was called $extMap | |||||
| #set( $mimeExtensionsMap = { | |||||
| "application/x-7z-compressed": "7z", | |||||
| "application/postscript": "ai", | |||||
| "application/pgp-signature": "asc", | |||||
| "application/octet-stream": "bin", | |||||
| "application/x-bzip2": "bz2", | |||||
| "text/x-c": "c", | |||||
| "application/vnd.ms-htmlhelp": "chm", | |||||
| "application/java-vm": "class", | |||||
| "text/css": "css", | |||||
| "text/csv": "csv", | |||||
| "application/x-debian-package": "deb", | |||||
| "application/msword": "doc", | |||||
| "message/rfc822": "eml", | |||||
| "image/gif": "gif", | |||||
| "application/winhlp": "hlp", | |||||
| "text/html": "html", | |||||
| "application/java-archive": "jar", | |||||
| "text/x-java-source": "java", | |||||
| "image/jpeg": "jpeg", | |||||
| "application/javascript": "js", | |||||
| "application/vnd.oasis.opendocument.chart": "odc", | |||||
| "application/vnd.oasis.opendocument.formula": "odf", | |||||
| "application/vnd.oasis.opendocument.graphics": "odg", | |||||
| "application/vnd.oasis.opendocument.image": "odi", | |||||
| "application/vnd.oasis.opendocument.presentation": "odp", | |||||
| "application/vnd.oasis.opendocument.spreadsheet": "ods", | |||||
| "application/vnd.oasis.opendocument.text": "odt", | |||||
| "application/pdf": "pdf", | |||||
| "application/pgp-encrypted": "pgp", | |||||
| "image/png": "png", | |||||
| "application/vnd.ms-powerpoint": "ppt", | |||||
| "audio/x-pn-realaudio": "ram", | |||||
| "application/x-rar-compressed": "rar", | |||||
| "application/vnd.rn-realmedia": "rm", | |||||
| "application/rtf": "rtf", | |||||
| "application/x-shockwave-flash": "swf", | |||||
| "application/vnd.sun.xml.calc": "sxc", | |||||
| "application/vnd.sun.xml.draw": "sxd", | |||||
| "application/vnd.sun.xml.impress": "sxi", | |||||
| "application/vnd.sun.xml.writer": "sxw", | |||||
| "application/x-tar": "tar", | |||||
| "application/x-tex": "tex", | |||||
| "text/plain": "txt", | |||||
| "text/x-vcard": "vcf", | |||||
| "application/vnd.visio": "vsd", | |||||
| "audio/x-wav": "wav", | |||||
| "audio/x-ms-wma": "wma", | |||||
| "video/x-ms-wmv": "wmv", | |||||
| "application/vnd.ms-excel": "xls", | |||||
| "application/xml": "xml", | |||||
| "application/x-xpinstall": "xpi", | |||||
| "application/zip": "zip" | |||||
| }) |
| <div id="facets"> | |||||
| #parse("facets.vm") | |||||
| </div> | |||||
| <div id="results_list"> | |||||
| <div class="pagination"> | |||||
| <span class="results-found">$page.results_found</span> $resource.results_found_in.insert(${response.responseHeader.QTime}) | |||||
| $resource.page_of.insert($page.current_page_number,$page.page_count) | |||||
| </div> | |||||
| #parse("results_list.vm") | |||||
| <div class="pagination"> | |||||
| #link_to_previous_page | |||||
| <span class="results-found">$page.results_found</span> $resource.results_found. | |||||
| $resource.page_of.insert($page.current_page_number,$page.page_count) | |||||
| #link_to_next_page | |||||
| </div> | |||||
| </div> |
| <ul id="tabs"> | |||||
| <li><a href="#url_for_type('all')" #if("#current_type"=="all")class="selected"#end>$resource.type.all ($response.response.facet_counts.facet_queries.all_types)</a></li> | |||||
| #foreach($type in $response.response.facet_counts.facet_fields.doc_type) | |||||
| #if($type.key) | |||||
| <li><a href="#url_for_type($type.key)" #if($type.value=="0")class="no_results"#end #if("#current_type"==$type.key)class="selected"#end> #label("type.${type.key}.label", $type.key) ($type.value)</a></li> | |||||
| #else | |||||
| #if($type.value > 0) | |||||
| <li><a href="#url_for_type('unknown')" #if("#current_type"=="unknown")class="selected"#end>$resource.type.unknown ($type.value)</a></li> | |||||
| #end | |||||
| #end | |||||
| #end | |||||
| </ul> | |||||
| <div id="results"> | |||||
| #foreach($doc in $response.results) | |||||
| #parse("hit.vm") | |||||
| #end | |||||
| </div> | |||||
| Import() | Import() | ||||
| { | { | ||||
| docker exec -it solr solr create_core -c $core | |||||
| docker exec -it solr solr create_core -c $core -d custom | |||||
| docker exec -ti --user=solr solr bash -c "cp -r /opt/solr/example/files/conf/* /var/solr/data/$core/conf/" | |||||
| #docker exec -ti --user=solr solr bash -c "cp -r /opt/solr/example/files/conf/* /var/solr/data/$core/conf/" | |||||
| docker restart solr | docker restart solr | ||||
| exit;; | exit;; | ||||
| z) # index all | z) # index all | ||||
| core="all" | core="all" | ||||
| location="data/2018 (10381)" | |||||
| location="data/pop_rtfs" | |||||
| Import | Import | ||||
| exit;; | exit;; | ||||
| a) # index ACTIVE folder | a) # index ACTIVE folder |