- '8983:8983' | - '8983:8983' | ||||
volumes: | volumes: | ||||
- solrdata:/var/solr | - solrdata:/var/solr | ||||
- ./solr_config:/opt/solr/server/solr/configsets/custom | |||||
volumes: | volumes: | ||||
solrdata: | solrdata: |
<br><br> | <br><br> | ||||
Year: | |||||
<?php | |||||
echo $result['year']; | |||||
?> | |||||
<br><br> | |||||
EPO publication: | EPO publication: | ||||
<a href=<?php echo $result['epo_publication_url']; ?>> | <a href=<?php echo $result['epo_publication_url']; ?>> |
<option value="multispecies">multi-species</option> | <option value="multispecies">multi-species</option> | ||||
<option value="surviving">surviving</option> | <option value="surviving">surviving</option> | ||||
</select> | </select> | ||||
sort by: | |||||
<select name="sort" id="sort"> | |||||
<option value="relevance">relevance</option> | |||||
<option value="year">year</option> | |||||
</select> | |||||
<input type="submit" id="submit" value="search"> | <input type="submit" id="submit" value="search"> | ||||
</form> | </form> | ||||
</div> | </div> |
<?php | <?php | ||||
$search_results = solr_search($_POST["search"], $_POST["searchopt"]); | |||||
$search_results = solr_search($_POST["search"], $_POST["searchopt"], $_POST["sort"]); | |||||
if(is_array($search_results)): | if(is_array($search_results)): | ||||
<br><br> | <br><br> | ||||
Year: | |||||
<?php | |||||
echo $result['year']; | |||||
?> | |||||
<br><br> | |||||
EPO publication: | EPO publication: | ||||
<a href=<?php echo $result['epo_publication_url']; ?>> | <a href=<?php echo $result['epo_publication_url']; ?>> | ||||
<br><br> | <br><br> | ||||
<?php | <?php | ||||
if ($result['abstract']): | |||||
if (isset($result['abstract'])): | |||||
?> | ?> | ||||
Abstract: | Abstract: |
<?php | <?php | ||||
function solr_search($search, $core){ | |||||
function solr_search($search, $core, $sort){ | |||||
// Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||||
$solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json'; | |||||
if ($sort == 'relevance'){ | |||||
// Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||||
$solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json'; | |||||
} | |||||
else{ | |||||
// Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||||
$solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json&sort=' . $sort . '%20asc'; | |||||
} | |||||
// Perform Curl request on the Solr API | // Perform Curl request on the Solr API | ||||
$ch = curl_init(); | $ch = curl_init(); | ||||
elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) { | elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) { | ||||
$output['abstract'] = $abstract[1]; | $output['abstract'] = $abstract[1]; | ||||
} | } | ||||
// Search for the year in the content element and display it | |||||
if (preg_match('/=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})/', $input, $year)){ | |||||
$output['year'] = $year[1]; | |||||
} | |||||
return $output; | return $output; | ||||
} | } | ||||
<?xml version="1.0" ?> | |||||
<!-- | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
--> | |||||
<!-- Example exchange rates file for CurrencyField type named "currency" in example schema --> | |||||
<currencyConfig version="1.0"> | |||||
<rates> | |||||
<!-- Updated from http://www.exchangerate.com/ at 2011-09-27 --> | |||||
<rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" /> | |||||
<rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" /> | |||||
<rate from="USD" to="EUR" rate="0.743676" comment="European Euro" /> | |||||
<rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" /> | |||||
<rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" /> | |||||
<rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" /> | |||||
<rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" /> | |||||
<rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" /> | |||||
<rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" /> | |||||
<rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" /> | |||||
<rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" /> | |||||
<rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" /> | |||||
<rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" /> | |||||
<rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" /> | |||||
<rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" /> | |||||
<rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" /> | |||||
<rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" /> | |||||
<rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" /> | |||||
<rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" /> | |||||
<rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" /> | |||||
<rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" /> | |||||
<rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" /> | |||||
<rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" /> | |||||
<rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" /> | |||||
<rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" /> | |||||
<rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" /> | |||||
<rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" /> | |||||
<rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" /> | |||||
<rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" /> | |||||
<rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" /> | |||||
<rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" /> | |||||
<rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" /> | |||||
<rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" /> | |||||
<rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" /> | |||||
<rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" /> | |||||
<rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" /> | |||||
<rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" /> | |||||
<!-- Cross-rates for some common currencies --> | |||||
<rate from="EUR" to="GBP" rate="0.869914" /> | |||||
<rate from="EUR" to="NOK" rate="7.800095" /> | |||||
<rate from="GBP" to="NOK" rate="8.966508" /> | |||||
</rates> | |||||
</currencyConfig> |
<?xml version="1.0" encoding="UTF-8" ?> | |||||
<!-- | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
--> | |||||
<!-- If this file is found in the config directory, it will only be | |||||
loaded once at startup. If it is found in Solr's data | |||||
directory, it will be re-loaded every commit. | |||||
See http://wiki.apache.org/solr/QueryElevationComponent for more info | |||||
--> | |||||
<elevate> | |||||
<!-- Query elevation examples | |||||
<query text="foo bar"> | |||||
<doc id="1" /> | |||||
<doc id="2" /> | |||||
<doc id="3" /> | |||||
</query> | |||||
for use with techproducts example | |||||
<query text="ipod"> | |||||
<doc id="MA147LL/A" /> put the actual ipod at the top | |||||
<doc id="IW-02" exclude="true" /> exclude this cable | |||||
</query> | |||||
--> | |||||
</elevate> |
<URL> | |||||
<EMAIL> |
# Set of Catalan contractions for ElisionFilter | |||||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
d | |||||
l | |||||
m | |||||
n | |||||
s | |||||
t |
# Set of French contractions for ElisionFilter | |||||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
l | |||||
m | |||||
t | |||||
qu | |||||
n | |||||
s | |||||
j | |||||
d | |||||
c | |||||
jusqu | |||||
quoiqu | |||||
lorsqu | |||||
puisqu |
# Set of Irish contractions for ElisionFilter | |||||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
d | |||||
m | |||||
b |
# Set of Italian contractions for ElisionFilter | |||||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
c | |||||
l | |||||
all | |||||
dall | |||||
dell | |||||
nell | |||||
sull | |||||
coll | |||||
pell | |||||
gl | |||||
agl | |||||
dagl | |||||
degl | |||||
negl | |||||
sugl | |||||
un | |||||
m | |||||
t | |||||
s | |||||
v | |||||
d |
# Set of Irish hyphenations for StopFilter | |||||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
h | |||||
n | |||||
t |
# Set of overrides for the dutch stemmer | |||||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||||
fiets fiets | |||||
bromfiets bromfiets | |||||
ei eier | |||||
kind kinder |
# | |||||
# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. | |||||
# | |||||
# Any token with a part-of-speech tag that exactly matches those defined in this | |||||
# file are removed from the token stream. | |||||
# | |||||
# Set your own stoptags by uncommenting the lines below. Note that comments are | |||||
# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, | |||||
# etc. that can be useful for building you own stoptag set. | |||||
# | |||||
# The entire possible tagset is provided below for convenience. | |||||
# | |||||
##### | |||||
# noun: unclassified nouns | |||||
#名詞 | |||||
# | |||||
# noun-common: Common nouns or nouns where the sub-classification is undefined | |||||
#名詞-一般 | |||||
# | |||||
# noun-proper: Proper nouns where the sub-classification is undefined | |||||
#名詞-固有名詞 | |||||
# | |||||
# noun-proper-misc: miscellaneous proper nouns | |||||
#名詞-固有名詞-一般 | |||||
# | |||||
# noun-proper-person: Personal names where the sub-classification is undefined | |||||
#名詞-固有名詞-人名 | |||||
# | |||||
# noun-proper-person-misc: names that cannot be divided into surname and | |||||
# given name; foreign names; names where the surname or given name is unknown. | |||||
# e.g. お市の方 | |||||
#名詞-固有名詞-人名-一般 | |||||
# | |||||
# noun-proper-person-surname: Mainly Japanese surnames. | |||||
# e.g. 山田 | |||||
#名詞-固有名詞-人名-姓 | |||||
# | |||||
# noun-proper-person-given_name: Mainly Japanese given names. | |||||
# e.g. 太郎 | |||||
#名詞-固有名詞-人名-名 | |||||
# | |||||
# noun-proper-organization: Names representing organizations. | |||||
# e.g. 通産省, NHK | |||||
#名詞-固有名詞-組織 | |||||
# | |||||
# noun-proper-place: Place names where the sub-classification is undefined | |||||
#名詞-固有名詞-地域 | |||||
# | |||||
# noun-proper-place-misc: Place names excluding countries. | |||||
# e.g. アジア, バルセロナ, 京都 | |||||
#名詞-固有名詞-地域-一般 | |||||
# | |||||
# noun-proper-place-country: Country names. | |||||
# e.g. 日本, オーストラリア | |||||
#名詞-固有名詞-地域-国 | |||||
# | |||||
# noun-pronoun: Pronouns where the sub-classification is undefined | |||||
#名詞-代名詞 | |||||
# | |||||
# noun-pronoun-misc: miscellaneous pronouns: | |||||
# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ | |||||
#名詞-代名詞-一般 | |||||
# | |||||
# noun-pronoun-contraction: Spoken language contraction made by combining a | |||||
# pronoun and the particle 'wa'. | |||||
# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ | |||||
#名詞-代名詞-縮約 | |||||
# | |||||
# noun-adverbial: Temporal nouns such as names of days or months that behave | |||||
# like adverbs. Nouns that represent amount or ratios and can be used adverbially, | |||||
# e.g. 金曜, 一月, 午後, 少量 | |||||
#名詞-副詞可能 | |||||
# | |||||
# noun-verbal: Nouns that take arguments with case and can appear followed by | |||||
# 'suru' and related verbs (する, できる, なさる, くださる) | |||||
# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り | |||||
#名詞-サ変接続 | |||||
# | |||||
# noun-adjective-base: The base form of adjectives, words that appear before な ("na") | |||||
# e.g. 健康, 安易, 駄目, だめ | |||||
#名詞-形容動詞語幹 | |||||
# | |||||
# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. | |||||
# e.g. 0, 1, 2, 何, 数, 幾 | |||||
#名詞-数 | |||||
# | |||||
# noun-affix: noun affixes where the sub-classification is undefined | |||||
#名詞-非自立 | |||||
# | |||||
# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that | |||||
# attach to the base form of inflectional words, words that cannot be classified | |||||
# into any of the other categories below. This category includes indefinite nouns. | |||||
# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, | |||||
# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, | |||||
# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, | |||||
# わり, 割り, 割, ん-口語/, もん-口語/ | |||||
#名詞-非自立-一般 | |||||
# | |||||
# noun-affix-adverbial: noun affixes that that can behave as adverbs. | |||||
# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, | |||||
# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, | |||||
# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, | |||||
# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, | |||||
# 儘, 侭, みぎり, 矢先 | |||||
#名詞-非自立-副詞可能 | |||||
# | |||||
# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars | |||||
# with the stem よう(だ) ("you(da)"). | |||||
# e.g. よう, やう, 様 (よう) | |||||
#名詞-非自立-助動詞語幹 | |||||
# | |||||
# noun-affix-adjective-base: noun affixes that can connect to the indeclinable | |||||
# connection form な (aux "da"). | |||||
# e.g. みたい, ふう | |||||
#名詞-非自立-形容動詞語幹 | |||||
# | |||||
# noun-special: special nouns where the sub-classification is undefined. | |||||
#名詞-特殊 | |||||
# | |||||
# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is | |||||
# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base | |||||
# form of inflectional words. | |||||
# e.g. そう | |||||
#名詞-特殊-助動詞語幹 | |||||
# | |||||
# noun-suffix: noun suffixes where the sub-classification is undefined. | |||||
#名詞-接尾 | |||||
# | |||||
# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect | |||||
# to ガル or タイ and can combine into compound nouns, words that cannot be classified into | |||||
# any of the other categories below. In general, this category is more inclusive than | |||||
# 接尾語 ("suffix") and is usually the last element in a compound noun. | |||||
# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, | |||||
# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 | |||||
#名詞-接尾-一般 | |||||
# | |||||
# noun-suffix-person: Suffixes that form nouns and attach to person names more often | |||||
# than other nouns. | |||||
# e.g. 君, 様, 著 | |||||
#名詞-接尾-人名 | |||||
# | |||||
# noun-suffix-place: Suffixes that form nouns and attach to place names more often | |||||
# than other nouns. | |||||
# e.g. 町, 市, 県 | |||||
#名詞-接尾-地域 | |||||
# | |||||
# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that | |||||
# can appear before スル ("suru"). | |||||
# e.g. 化, 視, 分け, 入り, 落ち, 買い | |||||
#名詞-接尾-サ変接続 | |||||
# | |||||
# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, | |||||
# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the | |||||
# conjunctive form of inflectional words. | |||||
# e.g. そう | |||||
#名詞-接尾-助動詞語幹 | |||||
# | |||||
# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive | |||||
# form of inflectional words and appear before the copula だ ("da"). | |||||
# e.g. 的, げ, がち | |||||
#名詞-接尾-形容動詞語幹 | |||||
# | |||||
# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. | |||||
# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) | |||||
#名詞-接尾-副詞可能 | |||||
# | |||||
# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category | |||||
# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach | |||||
# to numbers. | |||||
# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 | |||||
#名詞-接尾-助数詞 | |||||
# | |||||
# noun-suffix-special: Special suffixes that mainly attach to inflecting words. | |||||
# e.g. (楽し) さ, (考え) 方 | |||||
#名詞-接尾-特殊 | |||||
# | |||||
# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words | |||||
# together. | |||||
# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) | |||||
#名詞-接続詞的 | |||||
# | |||||
# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are | |||||
# semantically verb-like. | |||||
# e.g. ごらん, ご覧, 御覧, 頂戴 | |||||
#名詞-動詞非自立的 | |||||
# | |||||
# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, | |||||
# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") | |||||
# is いわく ("iwaku"). | |||||
#名詞-引用文字列 | |||||
# | |||||
# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and | |||||
# behave like an adjective. | |||||
# e.g. 申し訳, 仕方, とんでも, 違い | |||||
#名詞-ナイ形容詞語幹 | |||||
# | |||||
##### | |||||
# prefix: unclassified prefixes | |||||
#接頭詞 | |||||
# | |||||
# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) | |||||
# excluding numerical expressions. | |||||
# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) | |||||
#接頭詞-名詞接続 | |||||
# | |||||
# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb | |||||
# in conjunctive form followed by なる/なさる/くださる. | |||||
# e.g. お (読みなさい), お (座り) | |||||
#接頭詞-動詞接続 | |||||
# | |||||
# prefix-adjectival: Prefixes that attach to adjectives. | |||||
# e.g. お (寒いですねえ), バカ (でかい) | |||||
#接頭詞-形容詞接続 | |||||
# | |||||
# prefix-numerical: Prefixes that attach to numerical expressions. | |||||
# e.g. 約, およそ, 毎時 | |||||
#接頭詞-数接続 | |||||
# | |||||
##### | |||||
# verb: unclassified verbs | |||||
#動詞 | |||||
# | |||||
# verb-main: | |||||
#動詞-自立 | |||||
# | |||||
# verb-auxiliary: | |||||
#動詞-非自立 | |||||
# | |||||
# verb-suffix: | |||||
#動詞-接尾 | |||||
# | |||||
##### | |||||
# adjective: unclassified adjectives | |||||
#形容詞 | |||||
# | |||||
# adjective-main: | |||||
#形容詞-自立 | |||||
# | |||||
# adjective-auxiliary: | |||||
#形容詞-非自立 | |||||
# | |||||
# adjective-suffix: | |||||
#形容詞-接尾 | |||||
# | |||||
##### | |||||
# adverb: unclassified adverbs | |||||
#副詞 | |||||
# | |||||
# adverb-misc: Words that can be segmented into one unit and where adnominal | |||||
# modification is not possible. | |||||
# e.g. あいかわらず, 多分 | |||||
#副詞-一般 | |||||
# | |||||
# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, | |||||
# な, する, だ, etc. | |||||
# e.g. こんなに, そんなに, あんなに, なにか, なんでも | |||||
#副詞-助詞類接続 | |||||
# | |||||
##### | |||||
# adnominal: Words that only have noun-modifying forms. | |||||
# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, | |||||
# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, | |||||
# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き | |||||
#連体詞 | |||||
# | |||||
##### | |||||
# conjunction: Conjunctions that can occur independently. | |||||
# e.g. が, けれども, そして, じゃあ, それどころか | |||||
接続詞 | |||||
# | |||||
##### | |||||
# particle: unclassified particles. | |||||
助詞 | |||||
# | |||||
# particle-case: case particles where the subclassification is undefined. | |||||
助詞-格助詞 | |||||
# | |||||
# particle-case-misc: Case particles. | |||||
# e.g. から, が, で, と, に, へ, より, を, の, にて | |||||
助詞-格助詞-一般 | |||||
# | |||||
# particle-case-quote: the "to" that appears after nouns, a person’s speech, | |||||
# quotation marks, expressions of decisions from a meeting, reasons, judgements, | |||||
# conjectures, etc. | |||||
# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) | |||||
助詞-格助詞-引用 | |||||
# | |||||
# particle-case-compound: Compounds of particles and verbs that mainly behave | |||||
# like case particles. | |||||
# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, | |||||
# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, | |||||
# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, | |||||
# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, | |||||
# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, | |||||
# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, | |||||
# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, | |||||
# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ | |||||
助詞-格助詞-連語 | |||||
# | |||||
# particle-conjunctive: | |||||
# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, | |||||
# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, | |||||
# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ | |||||
助詞-接続助詞 | |||||
# | |||||
# particle-dependency: | |||||
# e.g. こそ, さえ, しか, すら, は, も, ぞ | |||||
助詞-係助詞 | |||||
# | |||||
# particle-adverbial: | |||||
# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, | |||||
# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, | |||||
# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, | |||||
# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, | |||||
# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) | |||||
助詞-副助詞 | |||||
# | |||||
# particle-interjective: particles with interjective grammatical roles. | |||||
# e.g. (松島) や | |||||
助詞-間投助詞 | |||||
# | |||||
# particle-coordinate: | |||||
# e.g. と, たり, だの, だり, とか, なり, や, やら | |||||
助詞-並立助詞 | |||||
# | |||||
# particle-final: | |||||
# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, | |||||
# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ | |||||
助詞-終助詞 | |||||
# | |||||
# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is | |||||
# adverbial, conjunctive, or sentence final. For example: | |||||
# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 | |||||
# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 | |||||
# 「(祈りが届いたせい) か (, 試験に合格した.)」 | |||||
# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 | |||||
# e.g. か | |||||
助詞-副助詞/並立助詞/終助詞 | |||||
# | |||||
# particle-adnominalizer: The "no" that attaches to nouns and modifies | |||||
# non-inflectional words. | |||||
助詞-連体化 | |||||
# | |||||
# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs | |||||
# that are giongo, giseigo, or gitaigo. | |||||
# e.g. に, と | |||||
助詞-副詞化 | |||||
# | |||||
# particle-special: A particle that does not fit into one of the above classifications. | |||||
# This includes particles that are used in Tanka, Haiku, and other poetry. | |||||
# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) | |||||
助詞-特殊 | |||||
# | |||||
##### | |||||
# auxiliary-verb: | |||||
助動詞 | |||||
# | |||||
##### | |||||
# interjection: Greetings and other exclamations. | |||||
# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, | |||||
# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい | |||||
#感動詞 | |||||
# | |||||
##### | |||||
# symbol: unclassified Symbols. | |||||
記号 | |||||
# | |||||
# symbol-misc: A general symbol not in one of the categories below. | |||||
# e.g. [○◎@$〒→+] | |||||
記号-一般 | |||||
# | |||||
# symbol-comma: Commas | |||||
# e.g. [,、] | |||||
記号-読点 | |||||
# | |||||
# symbol-period: Periods and full stops. | |||||
# e.g. [..。] | |||||
記号-句点 | |||||
# | |||||
# symbol-space: Full-width whitespace. | |||||
記号-空白 | |||||
# | |||||
# symbol-open_bracket: | |||||
# e.g. [({‘“『【] | |||||
記号-括弧開 | |||||
# | |||||
# symbol-close_bracket: | |||||
# e.g. [)}’”』」】] | |||||
記号-括弧閉 | |||||
# | |||||
# symbol-alphabetic: | |||||
#記号-アルファベット | |||||
# | |||||
##### | |||||
# other: unclassified other | |||||
#その他 | |||||
# | |||||
# other-interjection: Words that are hard to classify as noun-suffixes or | |||||
# sentence-final particles. | |||||
# e.g. (だ)ァ | |||||
その他-間投 | |||||
# | |||||
##### | |||||
# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. | |||||
# e.g. あの, うんと, えと | |||||
フィラー | |||||
# | |||||
##### | |||||
# non-verbal: non-verbal sound. | |||||
非言語音 | |||||
# | |||||
##### | |||||
# fragment: | |||||
#語断片 | |||||
# | |||||
##### | |||||
# unknown: unknown part of speech. | |||||
#未知語 | |||||
# | |||||
##### End of file |
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
# Also see http://www.opensource.org/licenses/bsd-license.html | |||||
# Cleaned on October 11, 2009 (not normalized, so use before normalization) | |||||
# This means that when modifying this list, you might need to add some | |||||
# redundant entries, for example containing forms with both أ and ا | |||||
من | |||||
ومن | |||||
منها | |||||
منه | |||||
في | |||||
وفي | |||||
فيها | |||||
فيه | |||||
و | |||||
ف | |||||
ثم | |||||
او | |||||
أو | |||||
ب | |||||
بها | |||||
به | |||||
ا | |||||
أ | |||||
اى | |||||
اي | |||||
أي | |||||
أى | |||||
لا | |||||
ولا | |||||
الا | |||||
ألا | |||||
إلا | |||||
لكن | |||||
ما | |||||
وما | |||||
كما | |||||
فما | |||||
عن | |||||
مع | |||||
اذا | |||||
إذا | |||||
ان | |||||
أن | |||||
إن | |||||
انها | |||||
أنها | |||||
إنها | |||||
انه | |||||
أنه | |||||
إنه | |||||
بان | |||||
بأن | |||||
فان | |||||
فأن | |||||
وان | |||||
وأن | |||||
وإن | |||||
التى | |||||
التي | |||||
الذى | |||||
الذي | |||||
الذين | |||||
الى | |||||
الي | |||||
إلى | |||||
إلي | |||||
على | |||||
عليها | |||||
عليه | |||||
اما | |||||
أما | |||||
إما | |||||
ايضا | |||||
أيضا | |||||
كل | |||||
وكل | |||||
لم | |||||
ولم | |||||
لن | |||||
ولن | |||||
هى | |||||
هي | |||||
هو | |||||
وهى | |||||
وهي | |||||
وهو | |||||
فهى | |||||
فهي | |||||
فهو | |||||
انت | |||||
أنت | |||||
لك | |||||
لها | |||||
له | |||||
هذه | |||||
هذا | |||||
تلك | |||||
ذلك | |||||
هناك | |||||
كانت | |||||
كان | |||||
يكون | |||||
تكون | |||||
وكانت | |||||
وكان | |||||
غير | |||||
بعض | |||||
قد | |||||
نحو | |||||
بين | |||||
بينما | |||||
منذ | |||||
ضمن | |||||
حيث | |||||
الان | |||||
الآن | |||||
خلال | |||||
بعد | |||||
قبل | |||||
حتى | |||||
عند | |||||
عندما | |||||
لدى | |||||
جميع |
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
# Also see http://www.opensource.org/licenses/bsd-license.html | |||||
а | |||||
аз | |||||
ако | |||||
ала | |||||
бе | |||||
без | |||||
беше | |||||
би | |||||
бил | |||||
била | |||||
били | |||||
било | |||||
близо | |||||
бъдат | |||||
бъде | |||||
бяха | |||||
в | |||||
вас | |||||
ваш | |||||
ваша | |||||
вероятно | |||||
вече | |||||
взема | |||||
ви | |||||
вие | |||||
винаги | |||||
все | |||||
всеки | |||||
всички | |||||
всичко | |||||
всяка | |||||
във | |||||
въпреки | |||||
върху | |||||
г | |||||
ги | |||||
главно | |||||
го | |||||
д | |||||
да | |||||
дали | |||||
до | |||||
докато | |||||
докога | |||||
дори | |||||
досега | |||||
доста | |||||
е | |||||
едва | |||||
един | |||||
ето | |||||
за | |||||
зад | |||||
заедно | |||||
заради | |||||
засега | |||||
затова | |||||
защо | |||||
защото | |||||
и | |||||
из | |||||
или | |||||
им | |||||
има | |||||
имат | |||||
иска | |||||
й | |||||
каза | |||||
как | |||||
каква | |||||
какво | |||||
както | |||||
какъв | |||||
като | |||||
кога | |||||
когато | |||||
което | |||||
които | |||||
кой | |||||
който | |||||
колко | |||||
която | |||||
къде | |||||
където | |||||
към | |||||
ли | |||||
м | |||||
ме | |||||
между | |||||
мен | |||||
ми | |||||
мнозина | |||||
мога | |||||
могат | |||||
може | |||||
моля | |||||
момента | |||||
му | |||||
н | |||||
на | |||||
над | |||||
назад | |||||
най | |||||
направи | |||||
напред | |||||
например | |||||
нас | |||||
не | |||||
него | |||||
нея | |||||
ни | |||||
ние | |||||
никой | |||||
нито | |||||
но | |||||
някои | |||||
някой | |||||
няма | |||||
обаче | |||||
около | |||||
освен | |||||
особено | |||||
от | |||||
отгоре | |||||
отново | |||||
още | |||||
пак | |||||
по | |||||
повече | |||||
повечето | |||||
под | |||||
поне | |||||
поради | |||||
после | |||||
почти | |||||
прави | |||||
пред | |||||
преди | |||||
през | |||||
при | |||||
пък | |||||
първо | |||||
с | |||||
са | |||||
само | |||||
се | |||||
сега | |||||
си | |||||
скоро | |||||
след | |||||
сме | |||||
според | |||||
сред | |||||
срещу | |||||
сте | |||||
съм | |||||
със | |||||
също | |||||
т | |||||
тази | |||||
така | |||||
такива | |||||
такъв | |||||
там | |||||
твой | |||||
те | |||||
тези | |||||
ти | |||||
тн | |||||
то | |||||
това | |||||
тогава | |||||
този | |||||
той | |||||
толкова | |||||
точно | |||||
трябва | |||||
тук | |||||
тъй | |||||
тя | |||||
тях | |||||
у | |||||
харесва | |||||
ч | |||||
че | |||||
често | |||||
чрез | |||||
ще | |||||
щом | |||||
я |
# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) | |||||
a | |||||
abans | |||||
ací | |||||
ah | |||||
així | |||||
això | |||||
al | |||||
als | |||||
aleshores | |||||
algun | |||||
alguna | |||||
algunes | |||||
alguns | |||||
alhora | |||||
allà | |||||
allí | |||||
allò | |||||
altra | |||||
altre | |||||
altres | |||||
amb | |||||
ambdós | |||||
ambdues | |||||
apa | |||||
aquell | |||||
aquella | |||||
aquelles | |||||
aquells | |||||
aquest | |||||
aquesta | |||||
aquestes | |||||
aquests | |||||
aquí | |||||
baix | |||||
cada | |||||
cadascú | |||||
cadascuna | |||||
cadascunes | |||||
cadascuns | |||||
com | |||||
contra | |||||
d'un | |||||
d'una | |||||
d'unes | |||||
d'uns | |||||
dalt | |||||
de | |||||
del | |||||
dels | |||||
des | |||||
després | |||||
dins | |||||
dintre | |||||
donat | |||||
doncs | |||||
durant | |||||
e | |||||
eh | |||||
el | |||||
els | |||||
em | |||||
en | |||||
encara | |||||
ens | |||||
entre | |||||
érem | |||||
eren | |||||
éreu | |||||
es | |||||
és | |||||
esta | |||||
està | |||||
estàvem | |||||
estaven | |||||
estàveu | |||||
esteu | |||||
et | |||||
etc | |||||
ets | |||||
fins | |||||
fora | |||||
gairebé | |||||
ha | |||||
han | |||||
has | |||||
havia | |||||
he | |||||
hem | |||||
heu | |||||
hi | |||||
ho | |||||
i | |||||
igual | |||||
iguals | |||||
ja | |||||
l'hi | |||||
la | |||||
les | |||||
li | |||||
li'n | |||||
llavors | |||||
m'he | |||||
ma | |||||
mal | |||||
malgrat | |||||
mateix | |||||
mateixa | |||||
mateixes | |||||
mateixos | |||||
me | |||||
mentre | |||||
més | |||||
meu | |||||
meus | |||||
meva | |||||
meves | |||||
molt | |||||
molta | |||||
moltes | |||||
molts | |||||
mon | |||||
mons | |||||
n'he | |||||
n'hi | |||||
ne | |||||
ni | |||||
no | |||||
nogensmenys | |||||
només | |||||
nosaltres | |||||
nostra | |||||
nostre | |||||
nostres | |||||
o | |||||
oh | |||||
oi | |||||
on | |||||
pas | |||||
pel | |||||
pels | |||||
per | |||||
però | |||||
perquè | |||||
poc | |||||
poca | |||||
pocs | |||||
poques | |||||
potser | |||||
propi | |||||
qual | |||||
quals | |||||
quan | |||||
quant | |||||
que | |||||
què | |||||
quelcom | |||||
qui | |||||
quin | |||||
quina | |||||
quines | |||||
quins | |||||
s'ha | |||||
s'han | |||||
sa | |||||
semblant | |||||
semblants | |||||
ses | |||||
seu | |||||
seus | |||||
seva | |||||
seva | |||||
seves | |||||
si | |||||
sobre | |||||
sobretot | |||||
sóc | |||||
solament | |||||
sols | |||||
son | |||||
són | |||||
sons | |||||
sota | |||||
sou | |||||
t'ha | |||||
t'han | |||||
t'he | |||||
ta | |||||
tal | |||||
també | |||||
tampoc | |||||
tan | |||||
tant | |||||
tanta | |||||
tantes | |||||
teu | |||||
teus | |||||
teva | |||||
teves | |||||
ton | |||||
tons | |||||
tot | |||||
tota | |||||
totes | |||||
tots | |||||
un | |||||
una | |||||
unes | |||||
uns | |||||
us | |||||
va | |||||
vaig | |||||
vam | |||||
van | |||||
vas | |||||
veu | |||||
vosaltres | |||||
vostra | |||||
vostre | |||||
vostres |
a | |||||
s | |||||
k | |||||
o | |||||
i | |||||
u | |||||
v | |||||
z | |||||
dnes | |||||
cz | |||||
tímto | |||||
budeš | |||||
budem | |||||
byli | |||||
jseš | |||||
můj | |||||
svým | |||||
ta | |||||
tomto | |||||
tohle | |||||
tuto | |||||
tyto | |||||
jej | |||||
zda | |||||
proč | |||||
máte | |||||
tato | |||||
kam | |||||
tohoto | |||||
kdo | |||||
kteří | |||||
mi | |||||
nám | |||||
tom | |||||
tomuto | |||||
mít | |||||
nic | |||||
proto | |||||
kterou | |||||
byla | |||||
toho | |||||
protože | |||||
asi | |||||
ho | |||||
naši | |||||
napište | |||||
re | |||||
což | |||||
tím | |||||
takže | |||||
svých | |||||
její | |||||
svými | |||||
jste | |||||
aj | |||||
tu | |||||
tedy | |||||
teto | |||||
bylo | |||||
kde | |||||
ke | |||||
pravé | |||||
ji | |||||
nad | |||||
nejsou | |||||
či | |||||
pod | |||||
téma | |||||
mezi | |||||
přes | |||||
ty | |||||
pak | |||||
vám | |||||
ani | |||||
když | |||||
však | |||||
neg | |||||
jsem | |||||
tento | |||||
článku | |||||
články | |||||
aby | |||||
jsme | |||||
před | |||||
pta | |||||
jejich | |||||
byl | |||||
ještě | |||||
až | |||||
bez | |||||
také | |||||
pouze | |||||
první | |||||
vaše | |||||
která | |||||
nás | |||||
nový | |||||
tipy | |||||
pokud | |||||
může | |||||
strana | |||||
jeho | |||||
své | |||||
jiné | |||||
zprávy | |||||
nové | |||||
není | |||||
vás | |||||
jen | |||||
podle | |||||
zde | |||||
už | |||||
být | |||||
více | |||||
bude | |||||
již | |||||
než | |||||
který | |||||
by | |||||
které | |||||
co | |||||
nebo | |||||
ten | |||||
tak | |||||
má | |||||
při | |||||
od | |||||
po | |||||
jsou | |||||
jak | |||||
další | |||||
ale | |||||
si | |||||
se | |||||
ve | |||||
to | |||||
jako | |||||
za | |||||
zpět | |||||
ze | |||||
do | |||||
pro | |||||
je | |||||
na | |||||
atd | |||||
atp | |||||
jakmile | |||||
přičemž | |||||
já | |||||
on | |||||
ona | |||||
ono | |||||
oni | |||||
ony | |||||
my | |||||
vy | |||||
jí | |||||
ji | |||||
mě | |||||
mne | |||||
jemu | |||||
tomu | |||||
těm | |||||
těmu | |||||
němu | |||||
němuž | |||||
jehož | |||||
jíž | |||||
jelikož | |||||
jež | |||||
jakož | |||||
načež |
| From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| A Danish stop word list. Comments begin with vertical bar. Each stop | |||||
| word is at the start of a line. | |||||
| This is a ranked list (commonest to rarest) of stopwords derived from | |||||
| a large text sample. | |||||
og | and | |||||
i | in | |||||
jeg | I | |||||
det | that (dem. pronoun)/it (pers. pronoun) | |||||
at | that (in front of a sentence)/to (with infinitive) | |||||
en | a/an | |||||
den | it (pers. pronoun)/that (dem. pronoun) | |||||
til | to/at/for/until/against/by/of/into, more | |||||
er | present tense of "to be" | |||||
som | who, as | |||||
på | on/upon/in/on/at/to/after/of/with/for, on | |||||
de | they | |||||
med | with/by/in, along | |||||
han | he | |||||
af | of/by/from/off/for/in/with/on, off | |||||
for | at/for/to/from/by/of/ago, in front/before, because | |||||
ikke | not | |||||
der | who/which, there/those | |||||
var | past tense of "to be" | |||||
mig | me/myself | |||||
sig | oneself/himself/herself/itself/themselves | |||||
men | but | |||||
et | a/an/one, one (number), someone/somebody/one | |||||
har | present tense of "to have" | |||||
om | round/about/for/in/a, about/around/down, if | |||||
vi | we | |||||
min | my | |||||
havde | past tense of "to have" | |||||
ham | him | |||||
hun | she | |||||
nu | now | |||||
over | over/above/across/by/beyond/past/on/about, over/past | |||||
da | then, when/as/since | |||||
fra | from/off/since, off, since | |||||
du | you | |||||
ud | out | |||||
sin | his/her/its/one's | |||||
dem | them | |||||
os | us/ourselves | |||||
op | up | |||||
man | you/one | |||||
hans | his | |||||
hvor | where | |||||
eller | or | |||||
hvad | what | |||||
skal | must/shall etc. | |||||
selv | myself/youself/herself/ourselves etc., even | |||||
her | here | |||||
alle | all/everyone/everybody etc. | |||||
vil | will (verb) | |||||
blev | past tense of "to stay/to remain/to get/to become" | |||||
kunne | could | |||||
ind | in | |||||
når | when | |||||
være | present tense of "to be" | |||||
dog | however/yet/after all | |||||
noget | something | |||||
ville | would | |||||
jo | you know/you see (adv), yes | |||||
deres | their/theirs | |||||
efter | after/behind/according to/for/by/from, later/afterwards | |||||
ned | down | |||||
skulle | should | |||||
denne | this | |||||
end | than | |||||
dette | this | |||||
mit | my/mine | |||||
også | also | |||||
under | under/beneath/below/during, below/underneath | |||||
have | have | |||||
dig | you | |||||
anden | other | |||||
hende | her | |||||
mine | my | |||||
alt | everything | |||||
meget | much/very, plenty of | |||||
sit | his, her, its, one's | |||||
sine | his, her, its, one's | |||||
vor | our | |||||
mod | against | |||||
disse | these | |||||
hvis | if | |||||
din | your/yours | |||||
nogle | some | |||||
hos | by/at | |||||
blive | be/become | |||||
mange | many | |||||
ad | by/through | |||||
bliver | present tense of "to be/to become" | |||||
hendes | her/hers | |||||
været | be | |||||
thi | for (conj) | |||||
jer | you | |||||
sådan | such, like this/like that |
| From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| A German stop word list. Comments begin with vertical bar. Each stop | |||||
| word is at the start of a line. | |||||
| The number of forms in this list is reduced significantly by passing it | |||||
| through the German stemmer. | |||||
aber | but | |||||
alle | all | |||||
allem | |||||
allen | |||||
aller | |||||
alles | |||||
als | than, as | |||||
also | so | |||||
am | an + dem | |||||
an | at | |||||
ander | other | |||||
andere | |||||
anderem | |||||
anderen | |||||
anderer | |||||
anderes | |||||
anderm | |||||
andern | |||||
anderr | |||||
anders | |||||
auch | also | |||||
auf | on | |||||
aus | out of | |||||
bei | by | |||||
bin | am | |||||
bis | until | |||||
bist | art | |||||
da | there | |||||
damit | with it | |||||
dann | then | |||||
der | the | |||||
den | |||||
des | |||||
dem | |||||
die | |||||
das | |||||
daß | that | |||||
derselbe | the same | |||||
derselben | |||||
denselben | |||||
desselben | |||||
demselben | |||||
dieselbe | |||||
dieselben | |||||
dasselbe | |||||
dazu | to that | |||||
dein | thy | |||||
deine | |||||
deinem | |||||
deinen | |||||
deiner | |||||
deines | |||||
denn | because | |||||
derer | of those | |||||
dessen | of him | |||||
dich | thee | |||||
dir | to thee | |||||
du | thou | |||||
dies | this | |||||
diese | |||||
diesem | |||||
diesen | |||||
dieser | |||||
dieses | |||||
doch | (several meanings) | |||||
dort | (over) there | |||||
durch | through | |||||
ein | a | |||||
eine | |||||
einem | |||||
einen | |||||
einer | |||||
eines | |||||
einig | some | |||||
einige | |||||
einigem | |||||
einigen | |||||
einiger | |||||
einiges | |||||
einmal | once | |||||
er | he | |||||
ihn | him | |||||
ihm | to him | |||||
es | it | |||||
etwas | something | |||||
euer | your | |||||
eure | |||||
eurem | |||||
euren | |||||
eurer | |||||
eures | |||||
für | for | |||||
gegen | towards | |||||
gewesen | p.p. of sein | |||||
hab | have | |||||
habe | have | |||||
haben | have | |||||
hat | has | |||||
hatte | had | |||||
hatten | had | |||||
hier | here | |||||
hin | there | |||||
hinter | behind | |||||
ich | I | |||||
mich | me | |||||
mir | to me | |||||
ihr | you, to her | |||||
ihre | |||||
ihrem | |||||
ihren | |||||
ihrer | |||||
ihres | |||||
euch | to you | |||||
im | in + dem | |||||
in | in | |||||
indem | while | |||||
ins | in + das | |||||
ist | is | |||||
jede | each, every | |||||
jedem | |||||
jeden | |||||
jeder | |||||
jedes | |||||
jene | that | |||||
jenem | |||||
jenen | |||||
jener | |||||
jenes | |||||
jetzt | now | |||||
kann | can | |||||
kein | no | |||||
keine | |||||
keinem | |||||
keinen | |||||
keiner | |||||
keines | |||||
können | can | |||||
könnte | could | |||||
machen | do | |||||
man | one | |||||
manche | some, many a | |||||
manchem | |||||
manchen | |||||
mancher | |||||
manches | |||||
mein | my | |||||
meine | |||||
meinem | |||||
meinen | |||||
meiner | |||||
meines | |||||
mit | with | |||||
muss | must | |||||
musste | had to | |||||
nach | to(wards) | |||||
nicht | not | |||||
nichts | nothing | |||||
noch | still, yet | |||||
nun | now | |||||
nur | only | |||||
ob | whether | |||||
oder | or | |||||
ohne | without | |||||
sehr | very | |||||
sein | his | |||||
seine | |||||
seinem | |||||
seinen | |||||
seiner | |||||
seines | |||||
selbst | self | |||||
sich | herself | |||||
sie | they, she | |||||
ihnen | to them | |||||
sind | are | |||||
so | so | |||||
solche | such | |||||
solchem | |||||
solchen | |||||
solcher | |||||
solches | |||||
soll | shall | |||||
sollte | should | |||||
sondern | but | |||||
sonst | else | |||||
über | over | |||||
um | about, around | |||||
und | and | |||||
uns | us | |||||
unse | |||||
unsem | |||||
unsen | |||||
unser | |||||
unses | |||||
unter | under | |||||
viel | much | |||||
vom | von + dem | |||||
von | from | |||||
vor | before | |||||
während | while | |||||
war | was | |||||
waren | were | |||||
warst | wast | |||||
was | what | |||||
weg | away, off | |||||
weil | because | |||||
weiter | further | |||||
welche | which | |||||
welchem | |||||
welchen | |||||
welcher | |||||
welches | |||||
wenn | when | |||||
werde | will | |||||
werden | will | |||||
wie | how | |||||
wieder | again | |||||
will | want | |||||
wir | we | |||||
wird | will | |||||
wirst | willst | |||||
wo | where | |||||
wollen | want | |||||
wollte | wanted | |||||
würde | would | |||||
würden | would | |||||
zu | to | |||||
zum | zu + dem | |||||
zur | zu + der | |||||
zwar | indeed | |||||
zwischen | between | |||||
# Lucene Greek Stopwords list | |||||
# Note: by default this file is used after GreekLowerCaseFilter, | |||||
# so when modifying this file use 'σ' instead of 'ς' | |||||
ο | |||||
η | |||||
το | |||||
οι | |||||
τα | |||||
του | |||||
τησ | |||||
των | |||||
τον | |||||
την | |||||
και | |||||
κι | |||||
κ | |||||
ειμαι | |||||
εισαι | |||||
ειναι | |||||
ειμαστε | |||||
ειστε | |||||
στο | |||||
στον | |||||
στη | |||||
στην | |||||
μα | |||||
αλλα | |||||
απο | |||||
για | |||||
προσ | |||||
με | |||||
σε | |||||
ωσ | |||||
παρα | |||||
αντι | |||||
κατα | |||||
μετα | |||||
θα | |||||
να | |||||
δε | |||||
δεν | |||||
μη | |||||
μην | |||||
επι | |||||
ενω | |||||
εαν | |||||
αν | |||||
τοτε | |||||
που | |||||
πωσ | |||||
ποιοσ | |||||
ποια | |||||
ποιο | |||||
ποιοι | |||||
ποιεσ | |||||
ποιων | |||||
ποιουσ | |||||
αυτοσ | |||||
αυτη | |||||
αυτο | |||||
αυτοι | |||||
αυτων | |||||
αυτουσ | |||||
αυτεσ | |||||
αυτα | |||||
εκεινοσ | |||||
εκεινη | |||||
εκεινο | |||||
εκεινοι | |||||
εκεινεσ | |||||
εκεινα | |||||
εκεινων | |||||
εκεινουσ | |||||
οπωσ | |||||
ομωσ | |||||
ισωσ | |||||
οσο | |||||
οτι |
# Licensed to the Apache Software Foundation (ASF) under one or more | |||||
# contributor license agreements. See the NOTICE file distributed with | |||||
# this work for additional information regarding copyright ownership. | |||||
# The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
# (the "License"); you may not use this file except in compliance with | |||||
# the License. You may obtain a copy of the License at | |||||
# | |||||
# http://www.apache.org/licenses/LICENSE-2.0 | |||||
# | |||||
# Unless required by applicable law or agreed to in writing, software | |||||
# distributed under the License is distributed on an "AS IS" BASIS, | |||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
# See the License for the specific language governing permissions and | |||||
# limitations under the License. | |||||
# a couple of test stopwords to test that the words are really being | |||||
# configured from this file: | |||||
stopworda | |||||
stopwordb | |||||
# Standard english stop words taken from Lucene's StopAnalyzer | |||||
a | |||||
an | |||||
and | |||||
are | |||||
as | |||||
at | |||||
be | |||||
but | |||||
by | |||||
for | |||||
if | |||||
in | |||||
into | |||||
is | |||||
it | |||||
no | |||||
not | |||||
of | |||||
on | |||||
or | |||||
such | |||||
that | |||||
the | |||||
their | |||||
then | |||||
there | |||||
these | |||||
they | |||||
this | |||||
to | |||||
was | |||||
will | |||||
with |
| From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| A Spanish stop word list. Comments begin with vertical bar. Each stop | |||||
| word is at the start of a line. | |||||
| The following is a ranked list (commonest to rarest) of stopwords | |||||
| deriving from a large sample of text. | |||||
| Extra words have been added at the end. | |||||
de | from, of | |||||
la | the, her | |||||
que | who, that | |||||
el | the | |||||
en | in | |||||
y | and | |||||
a | to | |||||
los | the, them | |||||
del | de + el | |||||
se | himself, from him etc | |||||
las | the, them | |||||
por | for, by, etc | |||||
un | a | |||||
para | for | |||||
con | with | |||||
no | no | |||||
una | a | |||||
su | his, her | |||||
al | a + el | |||||
| es from SER | |||||
lo | him | |||||
como | how | |||||
más | more | |||||
pero | pero | |||||
sus | su plural | |||||
le | to him, her | |||||
ya | already | |||||
o | or | |||||
| fue from SER | |||||
este | this | |||||
| ha from HABER | |||||
sí | himself etc | |||||
porque | because | |||||
esta | this | |||||
| son from SER | |||||
entre | between | |||||
| está from ESTAR | |||||
cuando | when | |||||
muy | very | |||||
sin | without | |||||
sobre | on | |||||
| ser from SER | |||||
| tiene from TENER | |||||
también | also | |||||
me | me | |||||
hasta | until | |||||
hay | there is/are | |||||
donde | where | |||||
| han from HABER | |||||
quien | whom, that | |||||
| están from ESTAR | |||||
| estado from ESTAR | |||||
desde | from | |||||
todo | all | |||||
nos | us | |||||
durante | during | |||||
| estados from ESTAR | |||||
todos | all | |||||
uno | a | |||||
les | to them | |||||
ni | nor | |||||
contra | against | |||||
otros | other | |||||
| fueron from SER | |||||
ese | that | |||||
eso | that | |||||
| había from HABER | |||||
ante | before | |||||
ellos | they | |||||
e | and (variant of y) | |||||
esto | this | |||||
mí | me | |||||
antes | before | |||||
algunos | some | |||||
qué | what? | |||||
unos | a | |||||
yo | I | |||||
otro | other | |||||
otras | other | |||||
otra | other | |||||
él | he | |||||
tanto | so much, many | |||||
esa | that | |||||
estos | these | |||||
mucho | much, many | |||||
quienes | who | |||||
nada | nothing | |||||
muchos | many | |||||
cual | who | |||||
| sea from SER | |||||
poco | few | |||||
ella | she | |||||
estar | to be | |||||
| haber from HABER | |||||
estas | these | |||||
| estaba from ESTAR | |||||
| estamos from ESTAR | |||||
algunas | some | |||||
algo | something | |||||
nosotros | we | |||||
| other forms | |||||
mi | me | |||||
mis | mi plural | |||||
tú | thou | |||||
te | thee | |||||
ti | thee | |||||
tu | thy | |||||
tus | tu plural | |||||
ellas | they | |||||
nosotras | we | |||||
vosotros | you | |||||
vosotras | you | |||||
os | you | |||||
mío | mine | |||||
mía | | |||||
míos | | |||||
mías | | |||||
tuyo | thine | |||||
tuya | | |||||
tuyos | | |||||
tuyas | | |||||
suyo | his, hers, theirs | |||||
suya | | |||||
suyos | | |||||
suyas | | |||||
nuestro | ours | |||||
nuestra | | |||||
nuestros | | |||||
nuestras | | |||||
vuestro | yours | |||||
vuestra | | |||||
vuestros | | |||||
vuestras | | |||||
esos | those | |||||
esas | those | |||||
| forms of estar, to be (not including the infinitive): | |||||
estoy | |||||
estás | |||||
está | |||||
estamos | |||||
estáis | |||||
están | |||||
esté | |||||
estés | |||||
estemos | |||||
estéis | |||||
estén | |||||
estaré | |||||
estarás | |||||
estará | |||||
estaremos | |||||
estaréis | |||||
estarán | |||||
estaría | |||||
estarías | |||||
estaríamos | |||||
estaríais | |||||
estarían | |||||
estaba | |||||
estabas | |||||
estábamos | |||||
estabais | |||||
estaban | |||||
estuve | |||||
estuviste | |||||
estuvo | |||||
estuvimos | |||||
estuvisteis | |||||
estuvieron | |||||
estuviera | |||||
estuvieras | |||||
estuviéramos | |||||
estuvierais | |||||
estuvieran | |||||
estuviese | |||||
estuvieses | |||||
estuviésemos | |||||
estuvieseis | |||||
estuviesen | |||||
estando | |||||
estado | |||||
estada | |||||
estados | |||||
estadas | |||||
estad | |||||
| forms of haber, to have (not including the infinitive): | |||||
he | |||||
has | |||||
ha | |||||
hemos | |||||
habéis | |||||
han | |||||
haya | |||||
hayas | |||||
hayamos | |||||
hayáis | |||||
hayan | |||||
habré | |||||
habrás | |||||
habrá | |||||
habremos | |||||
habréis | |||||
habrán | |||||
habría | |||||
habrías | |||||
habríamos | |||||
habríais | |||||
habrían | |||||
había | |||||
habías | |||||
habíamos | |||||
habíais | |||||
habían | |||||
hube | |||||
hubiste | |||||
hubo | |||||
hubimos | |||||
hubisteis | |||||
hubieron | |||||
hubiera | |||||
hubieras | |||||
hubiéramos | |||||
hubierais | |||||
hubieran | |||||
hubiese | |||||
hubieses | |||||
hubiésemos | |||||
hubieseis | |||||
hubiesen | |||||
habiendo | |||||
habido | |||||
habida | |||||
habidos | |||||
habidas | |||||
| forms of ser, to be (not including the infinitive): | |||||
soy | |||||
eres | |||||
es | |||||
somos | |||||
sois | |||||
son | |||||
sea | |||||
seas | |||||
seamos | |||||
seáis | |||||
sean | |||||
seré | |||||
serás | |||||
será | |||||
seremos | |||||
seréis | |||||
serán | |||||
sería | |||||
serías | |||||
seríamos | |||||
seríais | |||||
serían | |||||
era | |||||
eras | |||||
éramos | |||||
erais | |||||
eran | |||||
fui | |||||
fuiste | |||||
fue | |||||
fuimos | |||||
fuisteis | |||||
fueron | |||||
fuera | |||||
fueras | |||||
fuéramos | |||||
fuerais | |||||
fueran | |||||
fuese | |||||
fueses | |||||
fuésemos | |||||
fueseis | |||||
fuesen | |||||
siendo | |||||
sido | |||||
| sed also means 'thirst' | |||||
| forms of tener, to have (not including the infinitive): | |||||
tengo | |||||
tienes | |||||
tiene | |||||
tenemos | |||||
tenéis | |||||
tienen | |||||
tenga | |||||
tengas | |||||
tengamos | |||||
tengáis | |||||
tengan | |||||
tendré | |||||
tendrás | |||||
tendrá | |||||
tendremos | |||||
tendréis | |||||
tendrán | |||||
tendría | |||||
tendrías | |||||
tendríamos | |||||
tendríais | |||||
tendrían | |||||
tenía | |||||
tenías | |||||
teníamos | |||||
teníais | |||||
tenían | |||||
tuve | |||||
tuviste | |||||
tuvo | |||||
tuvimos | |||||
tuvisteis | |||||
tuvieron | |||||
tuviera | |||||
tuvieras | |||||
tuviéramos | |||||
tuvierais | |||||
tuvieran | |||||
tuviese | |||||
tuvieses | |||||
tuviésemos | |||||
tuvieseis | |||||
tuviesen | |||||
teniendo | |||||
tenido | |||||
tenida | |||||
tenidos | |||||
tenidas | |||||
tened | |||||
# example set of basque stopwords | |||||
al | |||||
anitz | |||||
arabera | |||||
asko | |||||
baina | |||||
bat | |||||
batean | |||||
batek | |||||
bati | |||||
batzuei | |||||
batzuek | |||||
batzuetan | |||||
batzuk | |||||
bera | |||||
beraiek | |||||
berau | |||||
berauek | |||||
bere | |||||
berori | |||||
beroriek | |||||
beste | |||||
bezala | |||||
da | |||||
dago | |||||
dira | |||||
ditu | |||||
du | |||||
dute | |||||
edo | |||||
egin | |||||
ere | |||||
eta | |||||
eurak | |||||
ez | |||||
gainera | |||||
gu | |||||
gutxi | |||||
guzti | |||||
haiei | |||||
haiek | |||||
haietan | |||||
hainbeste | |||||
hala | |||||
han | |||||
handik | |||||
hango | |||||
hara | |||||
hari | |||||
hark | |||||
hartan | |||||
hau | |||||
hauei | |||||
hauek | |||||
hauetan | |||||
hemen | |||||
hemendik | |||||
hemengo | |||||
hi | |||||
hona | |||||
honek | |||||
honela | |||||
honetan | |||||
honi | |||||
hor | |||||
hori | |||||
horiei | |||||
horiek | |||||
horietan | |||||
horko | |||||
horra | |||||
horrek | |||||
horrela | |||||
horretan | |||||
horri | |||||
hortik | |||||
hura | |||||
izan | |||||
ni | |||||
noiz | |||||
nola | |||||
non | |||||
nondik | |||||
nongo | |||||
nor | |||||
nora | |||||
ze | |||||
zein | |||||
zen | |||||
zenbait | |||||
zenbat | |||||
zer | |||||
zergatik | |||||
ziren | |||||
zituen | |||||
zu | |||||
zuek | |||||
zuen | |||||
zuten |
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
# Also see http://www.opensource.org/licenses/bsd-license.html | |||||
# Note: by default this file is used after normalization, so when adding entries | |||||
# to this file, use the arabic 'ي' instead of 'ی' | |||||
انان | |||||
نداشته | |||||
سراسر | |||||
خياه | |||||
ايشان | |||||
وي | |||||
تاكنون | |||||
بيشتري | |||||
دوم | |||||
پس | |||||
ناشي | |||||
وگو | |||||
يا | |||||
داشتند | |||||
سپس | |||||
هنگام | |||||
هرگز | |||||
پنج | |||||
نشان | |||||
امسال | |||||
ديگر | |||||
گروهي | |||||
شدند | |||||
چطور | |||||
ده | |||||
و | |||||
دو | |||||
نخستين | |||||
ولي | |||||
چرا | |||||
چه | |||||
وسط | |||||
ه | |||||
كدام | |||||
قابل | |||||
يك | |||||
رفت | |||||
هفت | |||||
همچنين | |||||
در | |||||
هزار | |||||
بله | |||||
بلي | |||||
شايد | |||||
اما | |||||
شناسي | |||||
گرفته | |||||
دهد | |||||
داشته | |||||
دانست | |||||
داشتن | |||||
خواهيم | |||||
ميليارد | |||||
وقتيكه | |||||
امد | |||||
خواهد | |||||
جز | |||||
اورده | |||||
شده | |||||
بلكه | |||||
خدمات | |||||
شدن | |||||
برخي | |||||
نبود | |||||
بسياري | |||||
جلوگيري | |||||
حق | |||||
كردند | |||||
نوعي | |||||
بعري | |||||
نكرده | |||||
نظير | |||||
نبايد | |||||
بوده | |||||
بودن | |||||
داد | |||||
اورد | |||||
هست | |||||
جايي | |||||
شود | |||||
دنبال | |||||
داده | |||||
بايد | |||||
سابق | |||||
هيچ | |||||
همان | |||||
انجا | |||||
كمتر | |||||
كجاست | |||||
گردد | |||||
كسي | |||||
تر | |||||
مردم | |||||
تان | |||||
دادن | |||||
بودند | |||||
سري | |||||
جدا | |||||
ندارند | |||||
مگر | |||||
يكديگر | |||||
دارد | |||||
دهند | |||||
بنابراين | |||||
هنگامي | |||||
سمت | |||||
جا | |||||
انچه | |||||
خود | |||||
دادند | |||||
زياد | |||||
دارند | |||||
اثر | |||||
بدون | |||||
بهترين | |||||
بيشتر | |||||
البته | |||||
به | |||||
براساس | |||||
بيرون | |||||
كرد | |||||
بعضي | |||||
گرفت | |||||
توي | |||||
اي | |||||
ميليون | |||||
او | |||||
جريان | |||||
تول | |||||
بر | |||||
مانند | |||||
برابر | |||||
باشيم | |||||
مدتي | |||||
گويند | |||||
اكنون | |||||
تا | |||||
تنها | |||||
جديد | |||||
چند | |||||
بي | |||||
نشده | |||||
كردن | |||||
كردم | |||||
گويد | |||||
كرده | |||||
كنيم | |||||
نمي | |||||
نزد | |||||
روي | |||||
قصد | |||||
فقط | |||||
بالاي | |||||
ديگران | |||||
اين | |||||
ديروز | |||||
توسط | |||||
سوم | |||||
ايم | |||||
دانند | |||||
سوي | |||||
استفاده | |||||
شما | |||||
كنار | |||||
داريم | |||||
ساخته | |||||
طور | |||||
امده | |||||
رفته | |||||
نخست | |||||
بيست | |||||
نزديك | |||||
طي | |||||
كنيد | |||||
از | |||||
انها | |||||
تمامي | |||||
داشت | |||||
يكي | |||||
طريق | |||||
اش | |||||
چيست | |||||
روب | |||||
نمايد | |||||
گفت | |||||
چندين | |||||
چيزي | |||||
تواند | |||||
ام | |||||
ايا | |||||
با | |||||
ان | |||||
ايد | |||||
ترين | |||||
اينكه | |||||
ديگري | |||||
راه | |||||
هايي | |||||
بروز | |||||
همچنان | |||||
پاعين | |||||
كس | |||||
حدود | |||||
مختلف | |||||
مقابل | |||||
چيز | |||||
گيرد | |||||
ندارد | |||||
ضد | |||||
همچون | |||||
سازي | |||||
شان | |||||
مورد | |||||
باره | |||||
مرسي | |||||
خويش | |||||
برخوردار | |||||
چون | |||||
خارج | |||||
شش | |||||
هنوز | |||||
تحت | |||||
ضمن | |||||
هستيم | |||||
گفته | |||||
فكر | |||||
بسيار | |||||
پيش | |||||
براي | |||||
روزهاي | |||||
انكه | |||||
نخواهد | |||||
بالا | |||||
كل | |||||
وقتي | |||||
كي | |||||
چنين | |||||
كه | |||||
گيري | |||||
نيست | |||||
است | |||||
كجا | |||||
كند | |||||
نيز | |||||
يابد | |||||
بندي | |||||
حتي | |||||
توانند | |||||
عقب | |||||
خواست | |||||
كنند | |||||
بين | |||||
تمام | |||||
همه | |||||
ما | |||||
باشند | |||||
مثل | |||||
شد | |||||
اري | |||||
باشد | |||||
اره | |||||
طبق | |||||
بعد | |||||
اگر | |||||
صورت | |||||
غير | |||||
جاي | |||||
بيش | |||||
ريزي | |||||
اند | |||||
زيرا | |||||
چگونه | |||||
بار | |||||
لطفا | |||||
مي | |||||
درباره | |||||
من | |||||
ديده | |||||
همين | |||||
گذاري | |||||
برداري | |||||
علت | |||||
گذاشته | |||||
هم | |||||
فوق | |||||
نه | |||||
ها | |||||
شوند | |||||
اباد | |||||
همواره | |||||
هر | |||||
اول | |||||
خواهند | |||||
چهار | |||||
نام | |||||
امروز | |||||
مان | |||||
هاي | |||||
قبل | |||||
كنم | |||||
سعي | |||||
تازه | |||||
را | |||||
هستند | |||||
زير | |||||
جلوي | |||||
عنوان | |||||
بود |
| From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| forms of BE | |||||
olla | |||||
olen | |||||
olet | |||||
on | |||||
olemme | |||||
olette | |||||
ovat | |||||
ole | negative form | |||||
oli | |||||
olisi | |||||
olisit | |||||
olisin | |||||
olisimme | |||||
olisitte | |||||
olisivat | |||||
olit | |||||
olin | |||||
olimme | |||||
olitte | |||||
olivat | |||||
ollut | |||||
olleet | |||||
en | negation | |||||
et | |||||
ei | |||||
emme | |||||
ette | |||||
eivät | |||||
|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans | |||||
minä minun minut minua minussa minusta minuun minulla minulta minulle | I | |||||
sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you | |||||
hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she | |||||
me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we | |||||
te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you | |||||
he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they | |||||
tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this | |||||
tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that | |||||
se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it | |||||
nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these | |||||
nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those | |||||
ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they | |||||
kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who | |||||
ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) | |||||
mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what | |||||
mitkä | (pl) | |||||
joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which | |||||
jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) | |||||
| conjunctions | |||||
että | that | |||||
ja | and | |||||
jos | if | |||||
koska | because | |||||
kuin | than | |||||
mutta | but | |||||
niin | so | |||||
sekä | and | |||||
sillä | for | |||||
tai | or | |||||
vaan | but | |||||
vai | or | |||||
vaikka | although | |||||
| prepositions | |||||
kanssa | with | |||||
mukaan | according to | |||||
noin | about | |||||
poikki | across | |||||
yli | over, across | |||||
| other | |||||
kun | when | |||||
niin | so | |||||
nyt | now | |||||
itse | self | |||||
| From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| A French stop word list. Comments begin with vertical bar. Each stop | |||||
| word is at the start of a line. | |||||
au | a + le | |||||
aux | a + les | |||||
avec | with | |||||
ce | this | |||||
ces | these | |||||
dans | with | |||||
de | of | |||||
des | de + les | |||||
du | de + le | |||||
elle | she | |||||
en | `of them' etc | |||||
et | and | |||||
eux | them | |||||
il | he | |||||
je | I | |||||
la | the | |||||
le | the | |||||
leur | their | |||||
lui | him | |||||
ma | my (fem) | |||||
mais | but | |||||
me | me | |||||
même | same; as in moi-même (myself) etc | |||||
mes | me (pl) | |||||
moi | me | |||||
mon | my (masc) | |||||
ne | not | |||||
nos | our (pl) | |||||
notre | our | |||||
nous | we | |||||
on | one | |||||
ou | where | |||||
par | by | |||||
pas | not | |||||
pour | for | |||||
qu | que before vowel | |||||
que | that | |||||
qui | who | |||||
sa | his, her (fem) | |||||
se | oneself | |||||
ses | his (pl) | |||||
son | his, her (masc) | |||||
sur | on | |||||
ta | thy (fem) | |||||
te | thee | |||||
tes | thy (pl) | |||||
toi | thee | |||||
ton | thy (masc) | |||||
tu | thou | |||||
un | a | |||||
une | a | |||||
vos | your (pl) | |||||
votre | your | |||||
vous | you | |||||
| single letter forms | |||||
c | c' | |||||
d | d' | |||||
j | j' | |||||
l | l' | |||||
à | to, at | |||||
m | m' | |||||
n | n' | |||||
s | s' | |||||
t | t' | |||||
y | there | |||||
| forms of être (not including the infinitive): | |||||
été | |||||
étée | |||||
étées | |||||
étés | |||||
étant | |||||
suis | |||||
es | |||||
est | |||||
sommes | |||||
êtes | |||||
sont | |||||
serai | |||||
seras | |||||
sera | |||||
serons | |||||
serez | |||||
seront | |||||
serais | |||||
serait | |||||
serions | |||||
seriez | |||||
seraient | |||||
étais | |||||
était | |||||
étions | |||||
étiez | |||||
étaient | |||||
fus | |||||
fut | |||||
fûmes | |||||
fûtes | |||||
furent | |||||
sois | |||||
soit | |||||
soyons | |||||
soyez | |||||
soient | |||||
fusse | |||||
fusses | |||||
fût | |||||
fussions | |||||
fussiez | |||||
fussent | |||||
| forms of avoir (not including the infinitive): | |||||
ayant | |||||
eu | |||||
eue | |||||
eues | |||||
eus | |||||
ai | |||||
as | |||||
avons | |||||
avez | |||||
ont | |||||
aurai | |||||
auras | |||||
aura | |||||
aurons | |||||
aurez | |||||
auront | |||||
aurais | |||||
aurait | |||||
aurions | |||||
auriez | |||||
auraient | |||||
avais | |||||
avait | |||||
avions | |||||
aviez | |||||
avaient | |||||
eut | |||||
eûmes | |||||
eûtes | |||||
eurent | |||||
aie | |||||
aies | |||||
ait | |||||
ayons | |||||
ayez | |||||
aient | |||||
eusse | |||||
eusses | |||||
eût | |||||
eussions | |||||
eussiez | |||||
eussent | |||||
| Later additions (from Jean-Christophe Deschamps) | |||||
ceci | this | |||||
cela | that | |||||
celà | that | |||||
cet | this | |||||
cette | this | |||||
ici | here | |||||
ils | they | |||||
les | the (pl) | |||||
leurs | their (pl) | |||||
quel | which | |||||
quels | which | |||||
quelle | which | |||||
quelles | which | |||||
sans | without | |||||
soi | oneself | |||||
a | |||||
ach | |||||
ag | |||||
agus | |||||
an | |||||
aon | |||||
ar | |||||
arna | |||||
as | |||||
b' | |||||
ba | |||||
beirt | |||||
bhúr | |||||
caoga | |||||
ceathair | |||||
ceathrar | |||||
chomh | |||||
chtó | |||||
chuig | |||||
chun | |||||
cois | |||||
céad | |||||
cúig | |||||
cúigear | |||||
d' | |||||
daichead | |||||
dar | |||||
de | |||||
deich | |||||
deichniúr | |||||
den | |||||
dhá | |||||
do | |||||
don | |||||
dtí | |||||
dá | |||||
dár | |||||
dó | |||||
faoi | |||||
faoin | |||||
faoina | |||||
faoinár | |||||
fara | |||||
fiche | |||||
gach | |||||
gan | |||||
go | |||||
gur | |||||
haon | |||||
hocht | |||||
i | |||||
iad | |||||
idir | |||||
in | |||||
ina | |||||
ins | |||||
inár | |||||
is | |||||
le | |||||
leis | |||||
lena | |||||
lenár | |||||
m' | |||||
mar | |||||
mo | |||||
mé | |||||
na | |||||
nach | |||||
naoi | |||||
naonúr | |||||
ná | |||||
ní | |||||
níor | |||||
nó | |||||
nócha | |||||
ocht | |||||
ochtar | |||||
os | |||||
roimh | |||||
sa | |||||
seacht | |||||
seachtar | |||||
seachtó | |||||
seasca | |||||
seisear | |||||
siad | |||||
sibh | |||||
sinn | |||||
sna | |||||
sé | |||||
sí | |||||
tar | |||||
thar | |||||
thú | |||||
triúr | |||||
trí | |||||
trína | |||||
trínár | |||||
tríocha | |||||
tú | |||||
um | |||||
ár | |||||
é | |||||
éis | |||||
í | |||||
ó | |||||
ón | |||||
óna | |||||
ónár |
# galican stopwords | |||||
a | |||||
aínda | |||||
alí | |||||
aquel | |||||
aquela | |||||
aquelas | |||||
aqueles | |||||
aquilo | |||||
aquí | |||||
ao | |||||
aos | |||||
as | |||||
así | |||||
á | |||||
ben | |||||
cando | |||||
che | |||||
co | |||||
coa | |||||
comigo | |||||
con | |||||
connosco | |||||
contigo | |||||
convosco | |||||
coas | |||||
cos | |||||
cun | |||||
cuns | |||||
cunha | |||||
cunhas | |||||
da | |||||
dalgunha | |||||
dalgunhas | |||||
dalgún | |||||
dalgúns | |||||
das | |||||
de | |||||
del | |||||
dela | |||||
delas | |||||
deles | |||||
desde | |||||
deste | |||||
do | |||||
dos | |||||
dun | |||||
duns | |||||
dunha | |||||
dunhas | |||||
e | |||||
el | |||||
ela | |||||
elas | |||||
eles | |||||
en | |||||
era | |||||
eran | |||||
esa | |||||
esas | |||||
ese | |||||
eses | |||||
esta | |||||
estar | |||||
estaba | |||||
está | |||||
están | |||||
este | |||||
estes | |||||
estiven | |||||
estou | |||||
eu | |||||
é | |||||
facer | |||||
foi | |||||
foron | |||||
fun | |||||
había | |||||
hai | |||||
iso | |||||
isto | |||||
la | |||||
las | |||||
lle | |||||
lles | |||||
lo | |||||
los | |||||
mais | |||||
me | |||||
meu | |||||
meus | |||||
min | |||||
miña | |||||
miñas | |||||
moi | |||||
na | |||||
nas | |||||
neste | |||||
nin | |||||
no | |||||
non | |||||
nos | |||||
nosa | |||||
nosas | |||||
noso | |||||
nosos | |||||
nós | |||||
nun | |||||
nunha | |||||
nuns | |||||
nunhas | |||||
o | |||||
os | |||||
ou | |||||
ó | |||||
ós | |||||
para | |||||
pero | |||||
pode | |||||
pois | |||||
pola | |||||
polas | |||||
polo | |||||
polos | |||||
por | |||||
que | |||||
se | |||||
senón | |||||
ser | |||||
seu | |||||
seus | |||||
sexa | |||||
sido | |||||
sobre | |||||
súa | |||||
súas | |||||
tamén | |||||
tan | |||||
te | |||||
ten | |||||
teñen | |||||
teño | |||||
ter | |||||
teu | |||||
teus | |||||
ti | |||||
tido | |||||
tiña | |||||
tiven | |||||
túa | |||||
túas | |||||
un | |||||
unha | |||||
unhas | |||||
uns | |||||
vos | |||||
vosa | |||||
vosas | |||||
voso | |||||
vosos | |||||
vós |
# Also see http://www.opensource.org/licenses/bsd-license.html | |||||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
# Note: by default this file also contains forms normalized by HindiNormalizer | |||||
# for spelling variation (see section below), such that it can be used whether or | |||||
# not you enable that feature. When adding additional entries to this list, | |||||
# please add the normalized form as well. | |||||
अंदर | |||||
अत | |||||
अपना | |||||
अपनी | |||||
अपने | |||||
अभी | |||||
आदि | |||||
आप | |||||
इत्यादि | |||||
इन | |||||
इनका | |||||
इन्हीं | |||||
इन्हें | |||||
इन्हों | |||||
इस | |||||
इसका | |||||
इसकी | |||||
इसके | |||||
इसमें | |||||
इसी | |||||
इसे | |||||
उन | |||||
उनका | |||||
उनकी | |||||
उनके | |||||
उनको | |||||
उन्हीं | |||||
उन्हें | |||||
उन्हों | |||||
उस | |||||
उसके | |||||
उसी | |||||
उसे | |||||
एक | |||||
एवं | |||||
एस | |||||
ऐसे | |||||
और | |||||
कई | |||||
कर | |||||
करता | |||||
करते | |||||
करना | |||||
करने | |||||
करें | |||||
कहते | |||||
कहा | |||||
का | |||||
काफ़ी | |||||
कि | |||||
कितना | |||||
किन्हें | |||||
किन्हों | |||||
किया | |||||
किर | |||||
किस | |||||
किसी | |||||
किसे | |||||
की | |||||
कुछ | |||||
कुल | |||||
के | |||||
को | |||||
कोई | |||||
कौन | |||||
कौनसा | |||||
गया | |||||
घर | |||||
जब | |||||
जहाँ | |||||
जा | |||||
जितना | |||||
जिन | |||||
जिन्हें | |||||
जिन्हों | |||||
जिस | |||||
जिसे | |||||
जीधर | |||||
जैसा | |||||
जैसे | |||||
जो | |||||
तक | |||||
तब | |||||
तरह | |||||
तिन | |||||
तिन्हें | |||||
तिन्हों | |||||
तिस | |||||
तिसे | |||||
तो | |||||
था | |||||
थी | |||||
थे | |||||
दबारा | |||||
दिया | |||||
दुसरा | |||||
दूसरे | |||||
दो | |||||
द्वारा | |||||
न | |||||
नहीं | |||||
ना | |||||
निहायत | |||||
नीचे | |||||
ने | |||||
पर | |||||
पर | |||||
पहले | |||||
पूरा | |||||
पे | |||||
फिर | |||||
बनी | |||||
बही | |||||
बहुत | |||||
बाद | |||||
बाला | |||||
बिलकुल | |||||
भी | |||||
भीतर | |||||
मगर | |||||
मानो | |||||
मे | |||||
में | |||||
यदि | |||||
यह | |||||
यहाँ | |||||
यही | |||||
या | |||||
यिह | |||||
ये | |||||
रखें | |||||
रहा | |||||
रहे | |||||
ऱ्वासा | |||||
लिए | |||||
लिये | |||||
लेकिन | |||||
व | |||||
वर्ग | |||||
वह | |||||
वह | |||||
वहाँ | |||||
वहीं | |||||
वाले | |||||
वुह | |||||
वे | |||||
वग़ैरह | |||||
संग | |||||
सकता | |||||
सकते | |||||
सबसे | |||||
सभी | |||||
साथ | |||||
साबुत | |||||
साभ | |||||
सारा | |||||
से | |||||
सो | |||||
ही | |||||
हुआ | |||||
हुई | |||||
हुए | |||||
है | |||||
हैं | |||||
हो | |||||
होता | |||||
होती | |||||
होते | |||||
होना | |||||
होने | |||||
# additional normalized forms of the above | |||||
अपनि | |||||
जेसे | |||||
होति | |||||
सभि | |||||
तिंहों | |||||
इंहों | |||||
दवारा | |||||
इसि | |||||
किंहें | |||||
थि | |||||
उंहों | |||||
ओर | |||||
जिंहें | |||||
वहिं | |||||
अभि | |||||
बनि | |||||
हि | |||||
उंहिं | |||||
उंहें | |||||
हें | |||||
वगेरह | |||||
एसे | |||||
रवासा | |||||
कोन | |||||
निचे | |||||
काफि | |||||
उसि | |||||
पुरा | |||||
भितर | |||||
हे | |||||
बहि | |||||
वहां | |||||
कोइ | |||||
यहां | |||||
जिंहों | |||||
तिंहें | |||||
किसि | |||||
कइ | |||||
यहि | |||||
इंहिं | |||||
जिधर | |||||
इंहें | |||||
अदि | |||||
इतयादि | |||||
हुइ | |||||
कोनसा | |||||
इसकि | |||||
दुसरे | |||||
जहां | |||||
अप | |||||
किंहों | |||||
उनकि | |||||
भि | |||||
वरग | |||||
हुअ | |||||
जेसा | |||||
नहिं |
| From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| Hungarian stop word list | |||||
| prepared by Anna Tordai | |||||
a | |||||
ahogy | |||||
ahol | |||||
aki | |||||
akik | |||||
akkor | |||||
alatt | |||||
által | |||||
általában | |||||
amely | |||||
amelyek | |||||
amelyekben | |||||
amelyeket | |||||
amelyet | |||||
amelynek | |||||
ami | |||||
amit | |||||
amolyan | |||||
amíg | |||||
amikor | |||||
át | |||||
abban | |||||
ahhoz | |||||
annak | |||||
arra | |||||
arról | |||||
az | |||||
azok | |||||
azon | |||||
azt | |||||
azzal | |||||
azért | |||||
aztán | |||||
azután | |||||
azonban | |||||
bár | |||||
be | |||||
belül | |||||
benne | |||||
cikk | |||||
cikkek | |||||
cikkeket | |||||
csak | |||||
de | |||||
e | |||||
eddig | |||||
egész | |||||
egy | |||||
egyes | |||||
egyetlen | |||||
egyéb | |||||
egyik | |||||
egyre | |||||
ekkor | |||||
el | |||||
elég | |||||
ellen | |||||
elő | |||||
először | |||||
előtt | |||||
első | |||||
én | |||||
éppen | |||||
ebben | |||||
ehhez | |||||
emilyen | |||||
ennek | |||||
erre | |||||
ez | |||||
ezt | |||||
ezek | |||||
ezen | |||||
ezzel | |||||
ezért | |||||
és | |||||
fel | |||||
felé | |||||
hanem | |||||
hiszen | |||||
hogy | |||||
hogyan | |||||
igen | |||||
így | |||||
illetve | |||||
ill. | |||||
ill | |||||
ilyen | |||||
ilyenkor | |||||
ison | |||||
ismét | |||||
itt | |||||
jó | |||||
jól | |||||
jobban | |||||
kell | |||||
kellett | |||||
keresztül | |||||
keressünk | |||||
ki | |||||
kívül | |||||
között | |||||
közül | |||||
legalább | |||||
lehet | |||||
lehetett | |||||
legyen | |||||
lenne | |||||
lenni | |||||
lesz | |||||
lett | |||||
maga | |||||
magát | |||||
majd | |||||
majd | |||||
már | |||||
más | |||||
másik | |||||
meg | |||||
még | |||||
mellett | |||||
mert | |||||
mely | |||||
melyek | |||||
mi | |||||
mit | |||||
míg | |||||
miért | |||||
milyen | |||||
mikor | |||||
minden | |||||
mindent | |||||
mindenki | |||||
mindig | |||||
mint | |||||
mintha | |||||
mivel | |||||
most | |||||
nagy | |||||
nagyobb | |||||
nagyon | |||||
ne | |||||
néha | |||||
nekem | |||||
neki | |||||
nem | |||||
néhány | |||||
nélkül | |||||
nincs | |||||
olyan | |||||
ott | |||||
össze | |||||
ő | |||||
ők | |||||
őket | |||||
pedig | |||||
persze | |||||
rá | |||||
s | |||||
saját | |||||
sem | |||||
semmi | |||||
sok | |||||
sokat | |||||
sokkal | |||||
számára | |||||
szemben | |||||
szerint | |||||
szinte | |||||
talán | |||||
tehát | |||||
teljes | |||||
tovább | |||||
továbbá | |||||
több | |||||
úgy | |||||
ugyanis | |||||
új | |||||
újabb | |||||
újra | |||||
után | |||||
utána | |||||
utolsó | |||||
vagy | |||||
vagyis | |||||
valaki | |||||
valami | |||||
valamint | |||||
való | |||||
vagyok | |||||
van | |||||
vannak | |||||
volt | |||||
voltam | |||||
voltak | |||||
voltunk | |||||
vissza | |||||
vele | |||||
viszont | |||||
volna |
# example set of Armenian stopwords. | |||||
այդ | |||||
այլ | |||||
այն | |||||
այս | |||||
դու | |||||
դուք | |||||
եմ | |||||
են | |||||
ենք | |||||
ես | |||||
եք | |||||
է | |||||
էի | |||||
էին | |||||
էինք | |||||
էիր | |||||
էիք | |||||
էր | |||||
ըստ | |||||
թ | |||||
ի | |||||
ին | |||||
իսկ | |||||
իր | |||||
կամ | |||||
համար | |||||
հետ | |||||
հետո | |||||
մենք | |||||
մեջ | |||||
մի | |||||
ն | |||||
նա | |||||
նաև | |||||
նրա | |||||
նրանք | |||||
որ | |||||
որը | |||||
որոնք | |||||
որպես | |||||
ու | |||||
ում | |||||
պիտի | |||||
վրա | |||||
և |
# from appendix D of: A Study of Stemming Effects on Information | |||||
# Retrieval in Bahasa Indonesia | |||||
ada | |||||
adanya | |||||
adalah | |||||
adapun | |||||
agak | |||||
agaknya | |||||
agar | |||||
akan | |||||
akankah | |||||
akhirnya | |||||
aku | |||||
akulah | |||||
amat | |||||
amatlah | |||||
anda | |||||
andalah | |||||
antar | |||||
diantaranya | |||||
antara | |||||
antaranya | |||||
diantara | |||||
apa | |||||
apaan | |||||
mengapa | |||||
apabila | |||||
apakah | |||||
apalagi | |||||
apatah | |||||
atau | |||||
ataukah | |||||
ataupun | |||||
bagai | |||||
bagaikan | |||||
sebagai | |||||
sebagainya | |||||
bagaimana | |||||
bagaimanapun | |||||
sebagaimana | |||||
bagaimanakah | |||||
bagi | |||||
bahkan | |||||
bahwa | |||||
bahwasanya | |||||
sebaliknya | |||||
banyak | |||||
sebanyak | |||||
beberapa | |||||
seberapa | |||||
begini | |||||
beginian | |||||
beginikah | |||||
beginilah | |||||
sebegini | |||||
begitu | |||||
begitukah | |||||
begitulah | |||||
begitupun | |||||
sebegitu | |||||
belum | |||||
belumlah | |||||
sebelum | |||||
sebelumnya | |||||
sebenarnya | |||||
berapa | |||||
berapakah | |||||
berapalah | |||||
berapapun | |||||
betulkah | |||||
sebetulnya | |||||
biasa | |||||
biasanya | |||||
bila | |||||
bilakah | |||||
bisa | |||||
bisakah | |||||
sebisanya | |||||
boleh | |||||
bolehkah | |||||
bolehlah | |||||
buat | |||||
bukan | |||||
bukankah | |||||
bukanlah | |||||
bukannya | |||||
cuma | |||||
percuma | |||||
dahulu | |||||
dalam | |||||
dan | |||||
dapat | |||||
dari | |||||
daripada | |||||
dekat | |||||
demi | |||||
demikian | |||||
demikianlah | |||||
sedemikian | |||||
dengan | |||||
depan | |||||
di | |||||
dia | |||||
dialah | |||||
dini | |||||
diri | |||||
dirinya | |||||
terdiri | |||||
dong | |||||
dulu | |||||
enggak | |||||
enggaknya | |||||
entah | |||||
entahlah | |||||
terhadap | |||||
terhadapnya | |||||
hal | |||||
hampir | |||||
hanya | |||||
hanyalah | |||||
harus | |||||
haruslah | |||||
harusnya | |||||
seharusnya | |||||
hendak | |||||
hendaklah | |||||
hendaknya | |||||
hingga | |||||
sehingga | |||||
ia | |||||
ialah | |||||
ibarat | |||||
ingin | |||||
inginkah | |||||
inginkan | |||||
ini | |||||
inikah | |||||
inilah | |||||
itu | |||||
itukah | |||||
itulah | |||||
jangan | |||||
jangankan | |||||
janganlah | |||||
jika | |||||
jikalau | |||||
juga | |||||
justru | |||||
kala | |||||
kalau | |||||
kalaulah | |||||
kalaupun | |||||
kalian | |||||
kami | |||||
kamilah | |||||
kamu | |||||
kamulah | |||||
kan | |||||
kapan | |||||
kapankah | |||||
kapanpun | |||||
dikarenakan | |||||
karena | |||||
karenanya | |||||
ke | |||||
kecil | |||||
kemudian | |||||
kenapa | |||||
kepada | |||||
kepadanya | |||||
ketika | |||||
seketika | |||||
khususnya | |||||
kini | |||||
kinilah | |||||
kiranya | |||||
sekiranya | |||||
kita | |||||
kitalah | |||||
kok | |||||
lagi | |||||
lagian | |||||
selagi | |||||
lah | |||||
lain | |||||
lainnya | |||||
melainkan | |||||
selaku | |||||
lalu | |||||
melalui | |||||
terlalu | |||||
lama | |||||
lamanya | |||||
selama | |||||
selama | |||||
selamanya | |||||
lebih | |||||
terlebih | |||||
bermacam | |||||
macam | |||||
semacam | |||||
maka | |||||
makanya | |||||
makin | |||||
malah | |||||
malahan | |||||
mampu | |||||
mampukah | |||||
mana | |||||
manakala | |||||
manalagi | |||||
masih | |||||
masihkah | |||||
semasih | |||||
masing | |||||
mau | |||||
maupun | |||||
semaunya | |||||
memang | |||||
mereka | |||||
merekalah | |||||
meski | |||||
meskipun | |||||
semula | |||||
mungkin | |||||
mungkinkah | |||||
nah | |||||
namun | |||||
nanti | |||||
nantinya | |||||
nyaris | |||||
oleh | |||||
olehnya | |||||
seorang | |||||
seseorang | |||||
pada | |||||
padanya | |||||
padahal | |||||
paling | |||||
sepanjang | |||||
pantas | |||||
sepantasnya | |||||
sepantasnyalah | |||||
para | |||||
pasti | |||||
pastilah | |||||
per | |||||
pernah | |||||
pula | |||||
pun | |||||
merupakan | |||||
rupanya | |||||
serupa | |||||
saat | |||||
saatnya | |||||
sesaat | |||||
saja | |||||
sajalah | |||||
saling | |||||
bersama | |||||
sama | |||||
sesama | |||||
sambil | |||||
sampai | |||||
sana | |||||
sangat | |||||
sangatlah | |||||
saya | |||||
sayalah | |||||
se | |||||
sebab | |||||
sebabnya | |||||
sebuah | |||||
tersebut | |||||
tersebutlah | |||||
sedang | |||||
sedangkan | |||||
sedikit | |||||
sedikitnya | |||||
segala | |||||
segalanya | |||||
segera | |||||
sesegera | |||||
sejak | |||||
sejenak | |||||
sekali | |||||
sekalian | |||||
sekalipun | |||||
sesekali | |||||
sekaligus | |||||
sekarang | |||||
sekarang | |||||
sekitar | |||||
sekitarnya | |||||
sela | |||||
selain | |||||
selalu | |||||
seluruh | |||||
seluruhnya | |||||
semakin | |||||
sementara | |||||
sempat | |||||
semua | |||||
semuanya | |||||
sendiri | |||||
sendirinya | |||||
seolah | |||||
seperti | |||||
sepertinya | |||||
sering | |||||
seringnya | |||||
serta | |||||
siapa | |||||
siapakah | |||||
siapapun | |||||
disini | |||||
disinilah | |||||
sini | |||||
sinilah | |||||
sesuatu | |||||
sesuatunya | |||||
suatu | |||||
sesudah | |||||
sesudahnya | |||||
sudah | |||||
sudahkah | |||||
sudahlah | |||||
supaya | |||||
tadi | |||||
tadinya | |||||
tak | |||||
tanpa | |||||
setelah | |||||
telah | |||||
tentang | |||||
tentu | |||||
tentulah | |||||
tentunya | |||||
tertentu | |||||
seterusnya | |||||
tapi | |||||
tetapi | |||||
setiap | |||||
tiap | |||||
setidaknya | |||||
tidak | |||||
tidakkah | |||||
tidaklah | |||||
toh | |||||
waduh | |||||
wah | |||||
wahai | |||||
sewaktu | |||||
walau | |||||
walaupun | |||||
wong | |||||
yaitu | |||||
yakni | |||||
yang |
| From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| An Italian stop word list. Comments begin with vertical bar. Each stop | |||||
| word is at the start of a line. | |||||
ad | a (to) before vowel | |||||
al | a + il | |||||
allo | a + lo | |||||
ai | a + i | |||||
agli | a + gli | |||||
all | a + l' | |||||
agl | a + gl' | |||||
alla | a + la | |||||
alle | a + le | |||||
con | with | |||||
col | con + il | |||||
coi | con + i (forms collo, cogli etc are now very rare) | |||||
da | from | |||||
dal | da + il | |||||
dallo | da + lo | |||||
dai | da + i | |||||
dagli | da + gli | |||||
dall | da + l' | |||||
dagl | da + gll' | |||||
dalla | da + la | |||||
dalle | da + le | |||||
di | of | |||||
del | di + il | |||||
dello | di + lo | |||||
dei | di + i | |||||
degli | di + gli | |||||
dell | di + l' | |||||
degl | di + gl' | |||||
della | di + la | |||||
delle | di + le | |||||
in | in | |||||
nel | in + el | |||||
nello | in + lo | |||||
nei | in + i | |||||
negli | in + gli | |||||
nell | in + l' | |||||
negl | in + gl' | |||||
nella | in + la | |||||
nelle | in + le | |||||
su | on | |||||
sul | su + il | |||||
sullo | su + lo | |||||
sui | su + i | |||||
sugli | su + gli | |||||
sull | su + l' | |||||
sugl | su + gl' | |||||
sulla | su + la | |||||
sulle | su + le | |||||
per | through, by | |||||
tra | among | |||||
contro | against | |||||
io | I | |||||
tu | thou | |||||
lui | he | |||||
lei | she | |||||
noi | we | |||||
voi | you | |||||
loro | they | |||||
mio | my | |||||
mia | | |||||
miei | | |||||
mie | | |||||
tuo | | |||||
tua | | |||||
tuoi | thy | |||||
tue | | |||||
suo | | |||||
sua | | |||||
suoi | his, her | |||||
sue | | |||||
nostro | our | |||||
nostra | | |||||
nostri | | |||||
nostre | | |||||
vostro | your | |||||
vostra | | |||||
vostri | | |||||
vostre | | |||||
mi | me | |||||
ti | thee | |||||
ci | us, there | |||||
vi | you, there | |||||
lo | him, the | |||||
la | her, the | |||||
li | them | |||||
le | them, the | |||||
gli | to him, the | |||||
ne | from there etc | |||||
il | the | |||||
un | a | |||||
uno | a | |||||
una | a | |||||
ma | but | |||||
ed | and | |||||
se | if | |||||
perché | why, because | |||||
anche | also | |||||
come | how | |||||
dov | where (as dov') | |||||
dove | where | |||||
che | who, that | |||||
chi | who | |||||
cui | whom | |||||
non | not | |||||
più | more | |||||
quale | who, that | |||||
quanto | how much | |||||
quanti | | |||||
quanta | | |||||
quante | | |||||
quello | that | |||||
quelli | | |||||
quella | | |||||
quelle | | |||||
questo | this | |||||
questi | | |||||
questa | | |||||
queste | | |||||
si | yes | |||||
tutto | all | |||||
tutti | all | |||||
| single letter forms: | |||||
a | at | |||||
c | as c' for ce or ci | |||||
e | and | |||||
i | the | |||||
l | as l' | |||||
o | or | |||||
| forms of avere, to have (not including the infinitive): | |||||
ho | |||||
hai | |||||
ha | |||||
abbiamo | |||||
avete | |||||
hanno | |||||
abbia | |||||
abbiate | |||||
abbiano | |||||
avrò | |||||
avrai | |||||
avrà | |||||
avremo | |||||
avrete | |||||
avranno | |||||
avrei | |||||
avresti | |||||
avrebbe | |||||
avremmo | |||||
avreste | |||||
avrebbero | |||||
avevo | |||||
avevi | |||||
aveva | |||||
avevamo | |||||
avevate | |||||
avevano | |||||
ebbi | |||||
avesti | |||||
ebbe | |||||
avemmo | |||||
aveste | |||||
ebbero | |||||
avessi | |||||
avesse | |||||
avessimo | |||||
avessero | |||||
avendo | |||||
avuto | |||||
avuta | |||||
avuti | |||||
avute | |||||
| forms of essere, to be (not including the infinitive): | |||||
sono | |||||
sei | |||||
è | |||||
siamo | |||||
siete | |||||
sia | |||||
siate | |||||
siano | |||||
sarò | |||||
sarai | |||||
sarà | |||||
saremo | |||||
sarete | |||||
saranno | |||||
sarei | |||||
saresti | |||||
sarebbe | |||||
saremmo | |||||
sareste | |||||
sarebbero | |||||
ero | |||||
eri | |||||
era | |||||
eravamo | |||||
eravate | |||||
erano | |||||
fui | |||||
fosti | |||||
fu | |||||
fummo | |||||
foste | |||||
furono | |||||
fossi | |||||
fosse | |||||
fossimo | |||||
fossero | |||||
essendo | |||||
| forms of fare, to do (not including the infinitive, fa, fat-): | |||||
faccio | |||||
fai | |||||
facciamo | |||||
fanno | |||||
faccia | |||||
facciate | |||||
facciano | |||||
farò | |||||
farai | |||||
farà | |||||
faremo | |||||
farete | |||||
faranno | |||||
farei | |||||
faresti | |||||
farebbe | |||||
faremmo | |||||
fareste | |||||
farebbero | |||||
facevo | |||||
facevi | |||||
faceva | |||||
facevamo | |||||
facevate | |||||
facevano | |||||
feci | |||||
facesti | |||||
fece | |||||
facemmo | |||||
faceste | |||||
fecero | |||||
facessi | |||||
facesse | |||||
facessimo | |||||
facessero | |||||
facendo | |||||
| forms of stare, to be (not including the infinitive): | |||||
sto | |||||
stai | |||||
sta | |||||
stiamo | |||||
stanno | |||||
stia | |||||
stiate | |||||
stiano | |||||
starò | |||||
starai | |||||
starà | |||||
staremo | |||||
starete | |||||
staranno | |||||
starei | |||||
staresti | |||||
starebbe | |||||
staremmo | |||||
stareste | |||||
starebbero | |||||
stavo | |||||
stavi | |||||
stava | |||||
stavamo | |||||
stavate | |||||
stavano | |||||
stetti | |||||
stesti | |||||
stette | |||||
stemmo | |||||
steste | |||||
stettero | |||||
stessi | |||||
stesse | |||||
stessimo | |||||
stessero | |||||
stando |
# | |||||
# This file defines a stopword set for Japanese. | |||||
# | |||||
# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. | |||||
# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 | |||||
# for frequency lists, etc. that can be useful for making your own set (if desired) | |||||
# | |||||
# Note that there is an overlap between these stopwords and the terms stopped when used | |||||
# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note | |||||
# that comments are not allowed on the same line as stopwords. | |||||
# | |||||
# Also note that stopping is done in a case-insensitive manner. Change your StopFilter | |||||
# configuration if you need case-sensitive stopping. Lastly, note that stopping is done | |||||
# using the same character width as the entries in this file. Since this StopFilter is | |||||
# normally done after a CJKWidthFilter in your chain, you would usually want your romaji | |||||
# entries to be in half-width and your kana entries to be in full-width. | |||||
# | |||||
の | |||||
に | |||||
は | |||||
を | |||||
た | |||||
が | |||||
で | |||||
て | |||||
と | |||||
し | |||||
れ | |||||
さ | |||||
ある | |||||
いる | |||||
も | |||||
する | |||||
から | |||||
な | |||||
こと | |||||
として | |||||
い | |||||
や | |||||
れる | |||||
など | |||||
なっ | |||||
ない | |||||
この | |||||
ため | |||||
その | |||||
あっ | |||||
よう | |||||
また | |||||
もの | |||||
という | |||||
あり | |||||
まで | |||||
られ | |||||
なる | |||||
へ | |||||
か | |||||
だ | |||||
これ | |||||
によって | |||||
により | |||||
おり | |||||
より | |||||
による | |||||
ず | |||||
なり | |||||
られる | |||||
において | |||||
ば | |||||
なかっ | |||||
なく | |||||
しかし | |||||
について | |||||
せ | |||||
だっ | |||||
その後 | |||||
できる | |||||
それ | |||||
う | |||||
ので | |||||
なお | |||||
のみ | |||||
でき | |||||
き | |||||
つ | |||||
における | |||||
および | |||||
いう | |||||
さらに | |||||
でも | |||||
ら | |||||
たり | |||||
その他 | |||||
に関する | |||||
たち | |||||
ます | |||||
ん | |||||
なら | |||||
に対して | |||||
特に | |||||
せる | |||||
及び | |||||
これら | |||||
とき | |||||
では | |||||
にて | |||||
ほか | |||||
ながら | |||||
うち | |||||
そして | |||||
とともに | |||||
ただし | |||||
かつて | |||||
それぞれ | |||||
または | |||||
お | |||||
ほど | |||||
ものの | |||||
に対する | |||||
ほとんど | |||||
と共に | |||||
といった | |||||
です | |||||
とも | |||||
ところ | |||||
ここ | |||||
##### End of file |
# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins | |||||
# the original list of over 800 forms was refined: | |||||
# pronouns, adverbs, interjections were removed | |||||
# | |||||
# prepositions | |||||
aiz | |||||
ap | |||||
ar | |||||
apakš | |||||
ārpus | |||||
augšpus | |||||
bez | |||||
caur | |||||
dēļ | |||||
gar | |||||
iekš | |||||
iz | |||||
kopš | |||||
labad | |||||
lejpus | |||||
līdz | |||||
no | |||||
otrpus | |||||
pa | |||||
par | |||||
pār | |||||
pēc | |||||
pie | |||||
pirms | |||||
pret | |||||
priekš | |||||
starp | |||||
šaipus | |||||
uz | |||||
viņpus | |||||
virs | |||||
virspus | |||||
zem | |||||
apakšpus | |||||
# Conjunctions | |||||
un | |||||
bet | |||||
jo | |||||
ja | |||||
ka | |||||
lai | |||||
tomēr | |||||
tikko | |||||
turpretī | |||||
arī | |||||
kaut | |||||
gan | |||||
tādēļ | |||||
tā | |||||
ne | |||||
tikvien | |||||
vien | |||||
kā | |||||
ir | |||||
te | |||||
vai | |||||
kamēr | |||||
# Particles | |||||
ar | |||||
diezin | |||||
droši | |||||
diemžēl | |||||
nebūt | |||||
ik | |||||
it | |||||
taču | |||||
nu | |||||
pat | |||||
tiklab | |||||
iekšpus | |||||
nedz | |||||
tik | |||||
nevis | |||||
turpretim | |||||
jeb | |||||
iekam | |||||
iekām | |||||
iekāms | |||||
kolīdz | |||||
līdzko | |||||
tiklīdz | |||||
jebšu | |||||
tālab | |||||
tāpēc | |||||
nekā | |||||
itin | |||||
jā | |||||
jau | |||||
jel | |||||
nē | |||||
nezin | |||||
tad | |||||
tikai | |||||
vis | |||||
tak | |||||
iekams | |||||
vien | |||||
# modal verbs | |||||
būt | |||||
biju | |||||
biji | |||||
bija | |||||
bijām | |||||
bijāt | |||||
esmu | |||||
esi | |||||
esam | |||||
esat | |||||
būšu | |||||
būsi | |||||
būs | |||||
būsim | |||||
būsiet | |||||
tikt | |||||
tiku | |||||
tiki | |||||
tika | |||||
tikām | |||||
tikāt | |||||
tieku | |||||
tiec | |||||
tiek | |||||
tiekam | |||||
tiekat | |||||
tikšu | |||||
tiks | |||||
tiksim | |||||
tiksiet | |||||
tapt | |||||
tapi | |||||
tapāt | |||||
topat | |||||
tapšu | |||||
tapsi | |||||
taps | |||||
tapsim | |||||
tapsiet | |||||
kļūt | |||||
kļuvu | |||||
kļuvi | |||||
kļuva | |||||
kļuvām | |||||
kļuvāt | |||||
kļūstu | |||||
kļūsti | |||||
kļūst | |||||
kļūstam | |||||
kļūstat | |||||
kļūšu | |||||
kļūsi | |||||
kļūs | |||||
kļūsim | |||||
kļūsiet | |||||
# verbs | |||||
varēt | |||||
varēju | |||||
varējām | |||||
varēšu | |||||
varēsim | |||||
var | |||||
varēji | |||||
varējāt | |||||
varēsi | |||||
varēsiet | |||||
varat | |||||
varēja | |||||
varēs |
| From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| A Dutch stop word list. Comments begin with vertical bar. Each stop | |||||
| word is at the start of a line. | |||||
| This is a ranked list (commonest to rarest) of stopwords derived from | |||||
| a large sample of Dutch text. | |||||
| Dutch stop words frequently exhibit homonym clashes. These are indicated | |||||
| clearly below. | |||||
de | the | |||||
en | and | |||||
van | of, from | |||||
ik | I, the ego | |||||
te | (1) chez, at etc, (2) to, (3) too | |||||
dat | that, which | |||||
die | that, those, who, which | |||||
in | in, inside | |||||
een | a, an, one | |||||
hij | he | |||||
het | the, it | |||||
niet | not, nothing, naught | |||||
zijn | (1) to be, being, (2) his, one's, its | |||||
is | is | |||||
was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river | |||||
op | on, upon, at, in, up, used up | |||||
aan | on, upon, to (as dative) | |||||
met | with, by | |||||
als | like, such as, when | |||||
voor | (1) before, in front of, (2) furrow | |||||
had | had, past tense all persons sing. of 'hebben' (have) | |||||
er | there | |||||
maar | but, only | |||||
om | round, about, for etc | |||||
hem | him | |||||
dan | then | |||||
zou | should/would, past tense all persons sing. of 'zullen' | |||||
of | or, whether, if | |||||
wat | what, something, anything | |||||
mijn | possessive and noun 'mine' | |||||
men | people, 'one' | |||||
dit | this | |||||
zo | so, thus, in this way | |||||
door | through by | |||||
over | over, across | |||||
ze | she, her, they, them | |||||
zich | oneself | |||||
bij | (1) a bee, (2) by, near, at | |||||
ook | also, too | |||||
tot | till, until | |||||
je | you | |||||
mij | me | |||||
uit | out of, from | |||||
der | Old Dutch form of 'van der' still found in surnames | |||||
daar | (1) there, (2) because | |||||
haar | (1) her, their, them, (2) hair | |||||
naar | (1) unpleasant, unwell etc, (2) towards, (3) as | |||||
heb | present first person sing. of 'to have' | |||||
hoe | how, why | |||||
heeft | present third person sing. of 'to have' | |||||
hebben | 'to have' and various parts thereof | |||||
deze | this | |||||
u | you | |||||
want | (1) for, (2) mitten, (3) rigging | |||||
nog | yet, still | |||||
zal | 'shall', first and third person sing. of verb 'zullen' (will) | |||||
me | me | |||||
zij | she, they | |||||
nu | now | |||||
ge | 'thou', still used in Belgium and south Netherlands | |||||
geen | none | |||||
omdat | because | |||||
iets | something, somewhat | |||||
worden | to become, grow, get | |||||
toch | yet, still | |||||
al | all, every, each | |||||
waren | (1) 'were' (2) to wander, (3) wares, (3) | |||||
veel | much, many | |||||
meer | (1) more, (2) lake | |||||
doen | to do, to make | |||||
toen | then, when | |||||
moet | noun 'spot/mote' and present form of 'to must' | |||||
ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' | |||||
zonder | without | |||||
kan | noun 'can' and present form of 'to be able' | |||||
hun | their, them | |||||
dus | so, consequently | |||||
alles | all, everything, anything | |||||
onder | under, beneath | |||||
ja | yes, of course | |||||
eens | once, one day | |||||
hier | here | |||||
wie | who | |||||
werd | imperfect third person sing. of 'become' | |||||
altijd | always | |||||
doch | yet, but etc | |||||
wordt | present third person sing. of 'become' | |||||
wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans | |||||
kunnen | to be able | |||||
ons | us/our | |||||
zelf | self | |||||
tegen | against, towards, at | |||||
na | after, near | |||||
reeds | already | |||||
wil | (1) present tense of 'want', (2) 'will', noun, (3) fender | |||||
kon | could; past tense of 'to be able' | |||||
niets | nothing | |||||
uw | your | |||||
iemand | somebody | |||||
geweest | been; past participle of 'be' | |||||
andere | other |
| From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| A Norwegian stop word list. Comments begin with vertical bar. Each stop | |||||
| word is at the start of a line. | |||||
| This stop word list is for the dominant bokmål dialect. Words unique | |||||
| to nynorsk are marked *. | |||||
| Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005 | |||||
og | and | |||||
i | in | |||||
jeg | I | |||||
det | it/this/that | |||||
at | to (w. inf.) | |||||
en | a/an | |||||
et | a/an | |||||
den | it/this/that | |||||
til | to | |||||
er | is/am/are | |||||
som | who/that | |||||
på | on | |||||
de | they / you(formal) | |||||
med | with | |||||
han | he | |||||
av | of | |||||
ikke | not | |||||
ikkje | not * | |||||
der | there | |||||
så | so | |||||
var | was/were | |||||
meg | me | |||||
seg | you | |||||
men | but | |||||
ett | one | |||||
har | have | |||||
om | about | |||||
vi | we | |||||
min | my | |||||
mitt | my | |||||
ha | have | |||||
hadde | had | |||||
hun | she | |||||
nå | now | |||||
over | over | |||||
da | when/as | |||||
ved | by/know | |||||
fra | from | |||||
du | you | |||||
ut | out | |||||
sin | your | |||||
dem | them | |||||
oss | us | |||||
opp | up | |||||
man | you/one | |||||
kan | can | |||||
hans | his | |||||
hvor | where | |||||
eller | or | |||||
hva | what | |||||
skal | shall/must | |||||
selv | self (reflective) | |||||
sjøl | self (reflective) | |||||
her | here | |||||
alle | all | |||||
vil | will | |||||
bli | become | |||||
ble | became | |||||
blei | became * | |||||
blitt | have become | |||||
kunne | could | |||||
inn | in | |||||
når | when | |||||
være | be | |||||
kom | come | |||||
noen | some | |||||
noe | some | |||||
ville | would | |||||
dere | you | |||||
som | who/which/that | |||||
deres | their/theirs | |||||
kun | only/just | |||||
ja | yes | |||||
etter | after | |||||
ned | down | |||||
skulle | should | |||||
denne | this | |||||
for | for/because | |||||
deg | you | |||||
si | hers/his | |||||
sine | hers/his | |||||
sitt | hers/his | |||||
mot | against | |||||
å | to | |||||
meget | much | |||||
hvorfor | why | |||||
dette | this | |||||
disse | these/those | |||||
uten | without | |||||
hvordan | how | |||||
ingen | none | |||||
din | your | |||||
ditt | your | |||||
blir | become | |||||
samme | same | |||||
hvilken | which | |||||
hvilke | which (plural) | |||||
sånn | such a | |||||
inni | inside/within | |||||
mellom | between | |||||
vår | our | |||||
hver | each | |||||
hvem | who | |||||
vors | us/ours | |||||
hvis | whose | |||||
både | both | |||||
bare | only/just | |||||
enn | than | |||||
fordi | as/because | |||||
før | before | |||||
mange | many | |||||
også | also | |||||
slik | just | |||||
vært | been | |||||
være | to be | |||||
båe | both * | |||||
begge | both | |||||
siden | since | |||||
dykk | your * | |||||
dykkar | yours * | |||||
dei | they * | |||||
deira | them * | |||||
deires | theirs * | |||||
deim | them * | |||||
di | your (fem.) * | |||||
då | as/when * | |||||
eg | I * | |||||
ein | a/an * | |||||
eit | a/an * | |||||
eitt | a/an * | |||||
elles | or * | |||||
honom | he * | |||||
hjå | at * | |||||
ho | she * | |||||
hoe | she * | |||||
henne | her | |||||
hennar | her/hers | |||||
hennes | hers | |||||
hoss | how * | |||||
hossen | how * | |||||
ikkje | not * | |||||
ingi | noone * | |||||
inkje | noone * | |||||
korleis | how * | |||||
korso | how * | |||||
kva | what/which * | |||||
kvar | where * | |||||
kvarhelst | where * | |||||
kven | who/whom * | |||||
kvi | why * | |||||
kvifor | why * | |||||
me | we * | |||||
medan | while * | |||||
mi | my * | |||||
mine | my * | |||||
mykje | much * | |||||
no | now * | |||||
nokon | some (masc./neut.) * | |||||
noka | some (fem.) * | |||||
nokor | some * | |||||
noko | some * | |||||
nokre | some * | |||||
si | his/hers * | |||||
sia | since * | |||||
sidan | since * | |||||
so | so * | |||||
somt | some * | |||||
somme | some * | |||||
um | about* | |||||
upp | up * | |||||
vere | be * | |||||
vore | was * | |||||
verte | become * | |||||
vort | become * | |||||
varte | became * | |||||
vart | became * | |||||
| From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| A Portuguese stop word list. Comments begin with vertical bar. Each stop | |||||
| word is at the start of a line. | |||||
| The following is a ranked list (commonest to rarest) of stopwords | |||||
| deriving from a large sample of text. | |||||
| Extra words have been added at the end. | |||||
de | of, from | |||||
a | the; to, at; her | |||||
o | the; him | |||||
que | who, that | |||||
e | and | |||||
do | de + o | |||||
da | de + a | |||||
em | in | |||||
um | a | |||||
para | for | |||||
| é from SER | |||||
com | with | |||||
não | not, no | |||||
uma | a | |||||
os | the; them | |||||
no | em + o | |||||
se | himself etc | |||||
na | em + a | |||||
por | for | |||||
mais | more | |||||
as | the; them | |||||
dos | de + os | |||||
como | as, like | |||||
mas | but | |||||
| foi from SER | |||||
ao | a + o | |||||
ele | he | |||||
das | de + as | |||||
| tem from TER | |||||
à | a + a | |||||
seu | his | |||||
sua | her | |||||
ou | or | |||||
| ser from SER | |||||
quando | when | |||||
muito | much | |||||
| há from HAV | |||||
nos | em + os; us | |||||
já | already, now | |||||
| está from EST | |||||
eu | I | |||||
também | also | |||||
só | only, just | |||||
pelo | per + o | |||||
pela | per + a | |||||
até | up to | |||||
isso | that | |||||
ela | he | |||||
entre | between | |||||
| era from SER | |||||
depois | after | |||||
sem | without | |||||
mesmo | same | |||||
aos | a + os | |||||
| ter from TER | |||||
seus | his | |||||
quem | whom | |||||
nas | em + as | |||||
me | me | |||||
esse | that | |||||
eles | they | |||||
| estão from EST | |||||
você | you | |||||
| tinha from TER | |||||
| foram from SER | |||||
essa | that | |||||
num | em + um | |||||
nem | nor | |||||
suas | her | |||||
meu | my | |||||
às | a + as | |||||
minha | my | |||||
| têm from TER | |||||
numa | em + uma | |||||
pelos | per + os | |||||
elas | they | |||||
| havia from HAV | |||||
| seja from SER | |||||
qual | which | |||||
| será from SER | |||||
nós | we | |||||
| tenho from TER | |||||
lhe | to him, her | |||||
deles | of them | |||||
essas | those | |||||
esses | those | |||||
pelas | per + as | |||||
este | this | |||||
| fosse from SER | |||||
dele | of him | |||||
| other words. There are many contractions such as naquele = em+aquele, | |||||
| mo = me+o, but they are rare. | |||||
| Indefinite article plural forms are also rare. | |||||
tu | thou | |||||
te | thee | |||||
vocês | you (plural) | |||||
vos | you | |||||
lhes | to them | |||||
meus | my | |||||
minhas | |||||
teu | thy | |||||
tua | |||||
teus | |||||
tuas | |||||
nosso | our | |||||
nossa | |||||
nossos | |||||
nossas | |||||
dela | of her | |||||
delas | of them | |||||
esta | this | |||||
estes | these | |||||
estas | these | |||||
aquele | that | |||||
aquela | that | |||||
aqueles | those | |||||
aquelas | those | |||||
isto | this | |||||
aquilo | that | |||||
| forms of estar, to be (not including the infinitive): | |||||
estou | |||||
está | |||||
estamos | |||||
estão | |||||
estive | |||||
esteve | |||||
estivemos | |||||
estiveram | |||||
estava | |||||
estávamos | |||||
estavam | |||||
estivera | |||||
estivéramos | |||||
esteja | |||||
estejamos | |||||
estejam | |||||
estivesse | |||||
estivéssemos | |||||
estivessem | |||||
estiver | |||||
estivermos | |||||
estiverem | |||||
| forms of haver, to have (not including the infinitive): | |||||
hei | |||||
há | |||||
havemos | |||||
hão | |||||
houve | |||||
houvemos | |||||
houveram | |||||
houvera | |||||
houvéramos | |||||
haja | |||||
hajamos | |||||
hajam | |||||
houvesse | |||||
houvéssemos | |||||
houvessem | |||||
houver | |||||
houvermos | |||||
houverem | |||||
houverei | |||||
houverá | |||||
houveremos | |||||
houverão | |||||
houveria | |||||
houveríamos | |||||
houveriam | |||||
| forms of ser, to be (not including the infinitive): | |||||
sou | |||||
somos | |||||
são | |||||
era | |||||
éramos | |||||
eram | |||||
fui | |||||
foi | |||||
fomos | |||||
foram | |||||
fora | |||||
fôramos | |||||
seja | |||||
sejamos | |||||
sejam | |||||
fosse | |||||
fôssemos | |||||
fossem | |||||
for | |||||
formos | |||||
forem | |||||
serei | |||||
será | |||||
seremos | |||||
serão | |||||
seria | |||||
seríamos | |||||
seriam | |||||
| forms of ter, to have (not including the infinitive): | |||||
tenho | |||||
tem | |||||
temos | |||||
tém | |||||
tinha | |||||
tínhamos | |||||
tinham | |||||
tive | |||||
teve | |||||
tivemos | |||||
tiveram | |||||
tivera | |||||
tivéramos | |||||
tenha | |||||
tenhamos | |||||
tenham | |||||
tivesse | |||||
tivéssemos | |||||
tivessem | |||||
tiver | |||||
tivermos | |||||
tiverem | |||||
terei | |||||
terá | |||||
teremos | |||||
terão | |||||
teria | |||||
teríamos | |||||
teriam |
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||||
# Also see http://www.opensource.org/licenses/bsd-license.html | |||||
acea | |||||
aceasta | |||||
această | |||||
aceea | |||||
acei | |||||
aceia | |||||
acel | |||||
acela | |||||
acele | |||||
acelea | |||||
acest | |||||
acesta | |||||
aceste | |||||
acestea | |||||
aceşti | |||||
aceştia | |||||
acolo | |||||
acum | |||||
ai | |||||
aia | |||||
aibă | |||||
aici | |||||
al | |||||
ăla | |||||
ale | |||||
alea | |||||
ălea | |||||
altceva | |||||
altcineva | |||||
am | |||||
ar | |||||
are | |||||
aş | |||||
aşadar | |||||
asemenea | |||||
asta | |||||
ăsta | |||||
astăzi | |||||
astea | |||||
ăstea | |||||
ăştia | |||||
asupra | |||||
aţi | |||||
au | |||||
avea | |||||
avem | |||||
aveţi | |||||
azi | |||||
bine | |||||
bucur | |||||
bună | |||||
ca | |||||
că | |||||
căci | |||||
când | |||||
care | |||||
cărei | |||||
căror | |||||
cărui | |||||
cât | |||||
câte | |||||
câţi | |||||
către | |||||
câtva | |||||
ce | |||||
cel | |||||
ceva | |||||
chiar | |||||
cînd | |||||
cine | |||||
cineva | |||||
cît | |||||
cîte | |||||
cîţi | |||||
cîtva | |||||
contra | |||||
cu | |||||
cum | |||||
cumva | |||||
curând | |||||
curînd | |||||
da | |||||
dă | |||||
dacă | |||||
dar | |||||
datorită | |||||
de | |||||
deci | |||||
deja | |||||
deoarece | |||||
departe | |||||
deşi | |||||
din | |||||
dinaintea | |||||
dintr | |||||
dintre | |||||
drept | |||||
după | |||||
ea | |||||
ei | |||||
el | |||||
ele | |||||
eram | |||||
este | |||||
eşti | |||||
eu | |||||
face | |||||
fără | |||||
fi | |||||
fie | |||||
fiecare | |||||
fii | |||||
fim | |||||
fiţi | |||||
iar | |||||
ieri | |||||
îi | |||||
îl | |||||
îmi | |||||
împotriva | |||||
în | |||||
înainte | |||||
înaintea | |||||
încât | |||||
încît | |||||
încotro | |||||
între | |||||
întrucât | |||||
întrucît | |||||
îţi | |||||
la | |||||
lângă | |||||
le | |||||
li | |||||
lîngă | |||||
lor | |||||
lui | |||||
mă | |||||
mâine | |||||
mea | |||||
mei | |||||
mele | |||||
mereu | |||||
meu | |||||
mi | |||||
mine | |||||
mult | |||||
multă | |||||
mulţi | |||||
ne | |||||
nicăieri | |||||
nici | |||||
nimeni | |||||
nişte | |||||
noastră | |||||
noastre | |||||
noi | |||||
noştri | |||||
nostru | |||||
nu | |||||
ori | |||||
oricând | |||||
oricare | |||||
oricât | |||||
orice | |||||
oricînd | |||||
oricine | |||||
oricît | |||||
oricum | |||||
oriunde | |||||
până | |||||
pe | |||||
pentru | |||||
peste | |||||
pînă | |||||
poate | |||||
pot | |||||
prea | |||||
prima | |||||
primul | |||||
prin | |||||
printr | |||||
sa | |||||
să | |||||
săi | |||||
sale | |||||
sau | |||||
său | |||||
se | |||||
şi | |||||
sînt | |||||
sîntem | |||||
sînteţi | |||||
spre | |||||
sub | |||||
sunt | |||||
suntem | |||||
sunteţi | |||||
ta | |||||
tăi | |||||
tale | |||||
tău | |||||
te | |||||
ţi | |||||
ţie | |||||
tine | |||||
toată | |||||
toate | |||||
tot | |||||
toţi | |||||
totuşi | |||||
tu | |||||
un | |||||
una | |||||
unde | |||||
undeva | |||||
unei | |||||
unele | |||||
uneori | |||||
unor | |||||
vă | |||||
vi | |||||
voastră | |||||
voastre | |||||
voi | |||||
voştri | |||||
vostru | |||||
vouă | |||||
vreo | |||||
vreun |
| From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| a russian stop word list. comments begin with vertical bar. each stop | |||||
| word is at the start of a line. | |||||
| this is a ranked list (commonest to rarest) of stopwords derived from | |||||
| a large text sample. | |||||
| letter `ё' is translated to `е'. | |||||
и | and | |||||
в | in/into | |||||
во | alternative form | |||||
не | not | |||||
что | what/that | |||||
он | he | |||||
на | on/onto | |||||
я | i | |||||
с | from | |||||
со | alternative form | |||||
как | how | |||||
а | milder form of `no' (but) | |||||
то | conjunction and form of `that' | |||||
все | all | |||||
она | she | |||||
так | so, thus | |||||
его | him | |||||
но | but | |||||
да | yes/and | |||||
ты | thou | |||||
к | towards, by | |||||
у | around, chez | |||||
же | intensifier particle | |||||
вы | you | |||||
за | beyond, behind | |||||
бы | conditional/subj. particle | |||||
по | up to, along | |||||
только | only | |||||
ее | her | |||||
мне | to me | |||||
было | it was | |||||
вот | here is/are, particle | |||||
от | away from | |||||
меня | me | |||||
еще | still, yet, more | |||||
нет | no, there isnt/arent | |||||
о | about | |||||
из | out of | |||||
ему | to him | |||||
теперь | now | |||||
когда | when | |||||
даже | even | |||||
ну | so, well | |||||
вдруг | suddenly | |||||
ли | interrogative particle | |||||
если | if | |||||
уже | already, but homonym of `narrower' | |||||
или | or | |||||
ни | neither | |||||
быть | to be | |||||
был | he was | |||||
него | prepositional form of его | |||||
до | up to | |||||
вас | you accusative | |||||
нибудь | indef. suffix preceded by hyphen | |||||
опять | again | |||||
уж | already, but homonym of `adder' | |||||
вам | to you | |||||
сказал | he said | |||||
ведь | particle `after all' | |||||
там | there | |||||
потом | then | |||||
себя | oneself | |||||
ничего | nothing | |||||
ей | to her | |||||
может | usually with `быть' as `maybe' | |||||
они | they | |||||
тут | here | |||||
где | where | |||||
есть | there is/are | |||||
надо | got to, must | |||||
ней | prepositional form of ей | |||||
для | for | |||||
мы | we | |||||
тебя | thee | |||||
их | them, their | |||||
чем | than | |||||
была | she was | |||||
сам | self | |||||
чтоб | in order to | |||||
без | without | |||||
будто | as if | |||||
человек | man, person, one | |||||
чего | genitive form of `what' | |||||
раз | once | |||||
тоже | also | |||||
себе | to oneself | |||||
под | beneath | |||||
жизнь | life | |||||
будет | will be | |||||
ж | short form of intensifer particle `же' | |||||
тогда | then | |||||
кто | who | |||||
этот | this | |||||
говорил | was saying | |||||
того | genitive form of `that' | |||||
потому | for that reason | |||||
этого | genitive form of `this' | |||||
какой | which | |||||
совсем | altogether | |||||
ним | prepositional form of `его', `они' | |||||
здесь | here | |||||
этом | prepositional form of `этот' | |||||
один | one | |||||
почти | almost | |||||
мой | my | |||||
тем | instrumental/dative plural of `тот', `то' | |||||
чтобы | full form of `in order that' | |||||
нее | her (acc.) | |||||
кажется | it seems | |||||
сейчас | now | |||||
были | they were | |||||
куда | where to | |||||
зачем | why | |||||
сказать | to say | |||||
всех | all (acc., gen. preposn. plural) | |||||
никогда | never | |||||
сегодня | today | |||||
можно | possible, one can | |||||
при | by | |||||
наконец | finally | |||||
два | two | |||||
об | alternative form of `о', about | |||||
другой | another | |||||
хоть | even | |||||
после | after | |||||
над | above | |||||
больше | more | |||||
тот | that one (masc.) | |||||
через | across, in | |||||
эти | these | |||||
нас | us | |||||
про | about | |||||
всего | in all, only, of all | |||||
них | prepositional form of `они' (they) | |||||
какая | which, feminine | |||||
много | lots | |||||
разве | interrogative particle | |||||
сказала | she said | |||||
три | three | |||||
эту | this, acc. fem. sing. | |||||
моя | my, feminine | |||||
впрочем | moreover, besides | |||||
хорошо | good | |||||
свою | ones own, acc. fem. sing. | |||||
этой | oblique form of `эта', fem. `this' | |||||
перед | in front of | |||||
иногда | sometimes | |||||
лучше | better | |||||
чуть | a little | |||||
том | preposn. form of `that one' | |||||
нельзя | one must not | |||||
такой | such a one | |||||
им | to them | |||||
более | more | |||||
всегда | always | |||||
конечно | of course | |||||
всю | acc. fem. sing of `all' | |||||
между | between | |||||
| b: some paradigms | |||||
| | |||||
| personal pronouns | |||||
| | |||||
| я меня мне мной [мною] | |||||
| ты тебя тебе тобой [тобою] | |||||
| он его ему им [него, нему, ним] | |||||
| она ее эи ею [нее, нэи, нею] | |||||
| оно его ему им [него, нему, ним] | |||||
| | |||||
| мы нас нам нами | |||||
| вы вас вам вами | |||||
| они их им ими [них, ним, ними] | |||||
| | |||||
| себя себе собой [собою] | |||||
| | |||||
| demonstrative pronouns: этот (this), тот (that) | |||||
| | |||||
| этот эта это эти | |||||
| этого эты это эти | |||||
| этого этой этого этих | |||||
| этому этой этому этим | |||||
| этим этой этим [этою] этими | |||||
| этом этой этом этих | |||||
| | |||||
| тот та то те | |||||
| того ту то те | |||||
| того той того тех | |||||
| тому той тому тем | |||||
| тем той тем [тою] теми | |||||
| том той том тех | |||||
| | |||||
| determinative pronouns | |||||
| | |||||
| (a) весь (all) | |||||
| | |||||
| весь вся все все | |||||
| всего всю все все | |||||
| всего всей всего всех | |||||
| всему всей всему всем | |||||
| всем всей всем [всею] всеми | |||||
| всем всей всем всех | |||||
| | |||||
| (b) сам (himself etc) | |||||
| | |||||
| сам сама само сами | |||||
| самого саму само самих | |||||
| самого самой самого самих | |||||
| самому самой самому самим | |||||
| самим самой самим [самою] самими | |||||
| самом самой самом самих | |||||
| | |||||
| stems of verbs `to be', `to have', `to do' and modal | |||||
| | |||||
| быть бы буд быв есть суть | |||||
| име | |||||
| дел | |||||
| мог мож мочь | |||||
| уме | |||||
| хоч хот | |||||
| долж | |||||
| можн | |||||
| нужн | |||||
| нельзя | |||||
| From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt | |||||
| This file is distributed under the BSD License. | |||||
| See http://snowball.tartarus.org/license.php | |||||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||||
| - Encoding was converted to UTF-8. | |||||
| - This notice was added. | |||||
| | |||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||||
| A Swedish stop word list. Comments begin with vertical bar. Each stop | |||||
| word is at the start of a line. | |||||
| This is a ranked list (commonest to rarest) of stopwords derived from | |||||
| a large text sample. | |||||
| Swedish stop words occasionally exhibit homonym clashes. For example | |||||
| så = so, but also seed. These are indicated clearly below. | |||||
och | and | |||||
det | it, this/that | |||||
att | to (with infinitive) | |||||
i | in, at | |||||
en | a | |||||
jag | I | |||||
hon | she | |||||
som | who, that | |||||
han | he | |||||
på | on | |||||
den | it, this/that | |||||
med | with | |||||
var | where, each | |||||
sig | him(self) etc | |||||
för | for | |||||
så | so (also: seed) | |||||
till | to | |||||
är | is | |||||
men | but | |||||
ett | a | |||||
om | if; around, about | |||||
hade | had | |||||
de | they, these/those | |||||
av | of | |||||
icke | not, no | |||||
mig | me | |||||
du | you | |||||
henne | her | |||||
då | then, when | |||||
sin | his | |||||
nu | now | |||||
har | have | |||||
inte | inte någon = no one | |||||
hans | his | |||||
honom | him | |||||
skulle | 'sake' | |||||
hennes | her | |||||
där | there | |||||
min | my | |||||
man | one (pronoun) | |||||
ej | nor | |||||
vid | at, by, on (also: vast) | |||||
kunde | could | |||||
något | some etc | |||||
från | from, off | |||||
ut | out | |||||
när | when | |||||
efter | after, behind | |||||
upp | up | |||||
vi | we | |||||
dem | them | |||||
vara | be | |||||
vad | what | |||||
över | over | |||||
än | than | |||||
dig | you | |||||
kan | can | |||||
sina | his | |||||
här | here | |||||
ha | have | |||||
mot | towards | |||||
alla | all | |||||
under | under (also: wonder) | |||||
någon | some etc | |||||
eller | or (else) | |||||
allt | all | |||||
mycket | much | |||||
sedan | since | |||||
ju | why | |||||
denna | this/that | |||||
själv | myself, yourself etc | |||||
detta | this/that | |||||
åt | to | |||||
utan | without | |||||
varit | was | |||||
hur | how | |||||
ingen | no | |||||
mitt | my | |||||
ni | you | |||||
bli | to be, become | |||||
blev | from bli | |||||
oss | us | |||||
din | thy | |||||
dessa | these/those | |||||
några | some etc | |||||
deras | their | |||||
blir | from bli | |||||
mina | my | |||||
samma | (the) same | |||||
vilken | who, that | |||||
er | you, your | |||||
sådan | such a | |||||
vår | our | |||||
blivit | from bli | |||||
dess | its | |||||
inom | within | |||||
mellan | between | |||||
sådant | such a | |||||
varför | why | |||||
varje | each | |||||
vilka | who, that | |||||
ditt | thy | |||||
vem | who | |||||
vilket | who, that | |||||
sitta | his | |||||
sådana | such a | |||||
vart | each | |||||
dina | thy | |||||
vars | whose | |||||
vårt | our | |||||
våra | our | |||||
ert | your | |||||
era | your | |||||
vilkas | whose | |||||
# Thai stopwords from: | |||||
# "Opinion Detection in Thai Political News Columns | |||||
# Based on Subjectivity Analysis" | |||||
# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak | |||||
ไว้ | |||||
ไม่ | |||||
ไป | |||||
ได้ | |||||
ให้ | |||||
ใน | |||||
โดย | |||||
แห่ง | |||||
แล้ว | |||||
และ | |||||
แรก | |||||
แบบ | |||||
แต่ | |||||
เอง | |||||
เห็น | |||||
เลย | |||||
เริ่ม | |||||
เรา | |||||
เมื่อ | |||||
เพื่อ | |||||
เพราะ | |||||
เป็นการ | |||||
เป็น | |||||
เปิดเผย | |||||
เปิด | |||||
เนื่องจาก | |||||
เดียวกัน | |||||
เดียว | |||||
เช่น | |||||
เฉพาะ | |||||
เคย | |||||
เข้า | |||||
เขา | |||||
อีก | |||||
อาจ | |||||
อะไร | |||||
ออก | |||||
อย่าง | |||||
อยู่ | |||||
อยาก | |||||
หาก | |||||
หลาย | |||||
หลังจาก | |||||
หลัง | |||||
หรือ | |||||
หนึ่ง | |||||
ส่วน | |||||
ส่ง | |||||
สุด | |||||
สําหรับ | |||||
ว่า | |||||
วัน | |||||
ลง | |||||
ร่วม | |||||
ราย | |||||
รับ | |||||
ระหว่าง | |||||
รวม | |||||
ยัง | |||||
มี | |||||
มาก | |||||
มา | |||||
พร้อม | |||||
พบ | |||||
ผ่าน | |||||
ผล | |||||
บาง | |||||
น่า | |||||
นี้ | |||||
นํา | |||||
นั้น | |||||
นัก | |||||
นอกจาก | |||||
ทุก | |||||
ที่สุด | |||||
ที่ | |||||
ทําให้ | |||||
ทํา | |||||
ทาง | |||||
ทั้งนี้ | |||||
ทั้ง | |||||
ถ้า | |||||
ถูก | |||||
ถึง | |||||
ต้อง | |||||
ต่างๆ | |||||
ต่าง | |||||
ต่อ | |||||
ตาม | |||||
ตั้งแต่ | |||||
ตั้ง | |||||
ด้าน | |||||
ด้วย | |||||
ดัง | |||||
ซึ่ง | |||||
ช่วง | |||||
จึง | |||||
จาก | |||||
จัด | |||||
จะ | |||||
คือ | |||||
ความ | |||||
ครั้ง | |||||
คง | |||||
ขึ้น | |||||
ของ | |||||
ขอ | |||||
ขณะ | |||||
ก่อน | |||||
ก็ | |||||
การ | |||||
กับ | |||||
กัน | |||||
กว่า | |||||
กล่าว |
# Turkish stopwords from LUCENE-559 | |||||
# merged with the list from "Information Retrieval on Turkish Texts" | |||||
# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) | |||||
acaba | |||||
altmış | |||||
altı | |||||
ama | |||||
ancak | |||||
arada | |||||
aslında | |||||
ayrıca | |||||
bana | |||||
bazı | |||||
belki | |||||
ben | |||||
benden | |||||
beni | |||||
benim | |||||
beri | |||||
beş | |||||
bile | |||||
bin | |||||
bir | |||||
birçok | |||||
biri | |||||
birkaç | |||||
birkez | |||||
birşey | |||||
birşeyi | |||||
biz | |||||
bize | |||||
bizden | |||||
bizi | |||||
bizim | |||||
böyle | |||||
böylece | |||||
bu | |||||
buna | |||||
bunda | |||||
bundan | |||||
bunlar | |||||
bunları | |||||
bunların | |||||
bunu | |||||
bunun | |||||
burada | |||||
çok | |||||
çünkü | |||||
da | |||||
daha | |||||
dahi | |||||
de | |||||
defa | |||||
değil | |||||
diğer | |||||
diye | |||||
doksan | |||||
dokuz | |||||
dolayı | |||||
dolayısıyla | |||||
dört | |||||
edecek | |||||
eden | |||||
ederek | |||||
edilecek | |||||
ediliyor | |||||
edilmesi | |||||
ediyor | |||||
eğer | |||||
elli | |||||
en | |||||
etmesi | |||||
etti | |||||
ettiği | |||||
ettiğini | |||||
gibi | |||||
göre | |||||
halen | |||||
hangi | |||||
hatta | |||||
hem | |||||
henüz | |||||
hep | |||||
hepsi | |||||
her | |||||
herhangi | |||||
herkesin | |||||
hiç | |||||
hiçbir | |||||
için | |||||
iki | |||||
ile | |||||
ilgili | |||||
ise | |||||
işte | |||||
itibaren | |||||
itibariyle | |||||
kadar | |||||
karşın | |||||
katrilyon | |||||
kendi | |||||
kendilerine | |||||
kendini | |||||
kendisi | |||||
kendisine | |||||
kendisini | |||||
kez | |||||
ki | |||||
kim | |||||
kimden | |||||
kime | |||||
kimi | |||||
kimse | |||||
kırk | |||||
milyar | |||||
milyon | |||||
mu | |||||
mü | |||||
mı | |||||
nasıl | |||||
ne | |||||
neden | |||||
nedenle | |||||
nerde | |||||
nerede | |||||
nereye | |||||
niye | |||||
niçin | |||||
o | |||||
olan | |||||
olarak | |||||
oldu | |||||
olduğu | |||||
olduğunu | |||||
olduklarını | |||||
olmadı | |||||
olmadığı | |||||
olmak | |||||
olması | |||||
olmayan | |||||
olmaz | |||||
olsa | |||||
olsun | |||||
olup | |||||
olur | |||||
olursa | |||||
oluyor | |||||
on | |||||
ona | |||||
ondan | |||||
onlar | |||||
onlardan | |||||
onları | |||||
onların | |||||
onu | |||||
onun | |||||
otuz | |||||
oysa | |||||
öyle | |||||
pek | |||||
rağmen | |||||
sadece | |||||
sanki | |||||
sekiz | |||||
seksen | |||||
sen | |||||
senden | |||||
seni | |||||
senin | |||||
siz | |||||
sizden | |||||
sizi | |||||
sizin | |||||
şey | |||||
şeyden | |||||
şeyi | |||||
şeyler | |||||
şöyle | |||||
şu | |||||
şuna | |||||
şunda | |||||
şundan | |||||
şunları | |||||
şunu | |||||
tarafından | |||||
trilyon | |||||
tüm | |||||
üç | |||||
üzere | |||||
var | |||||
vardı | |||||
ve | |||||
veya | |||||
ya | |||||
yani | |||||
yapacak | |||||
yapılan | |||||
yapılması | |||||
yapıyor | |||||
yapmak | |||||
yaptı | |||||
yaptığı | |||||
yaptığını | |||||
yaptıkları | |||||
yedi | |||||
yerine | |||||
yetmiş | |||||
yine | |||||
yirmi | |||||
yoksa | |||||
yüz | |||||
zaten |
# | |||||
# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) | |||||
# | |||||
# Add entries to this file in order to override the statistical model in terms | |||||
# of segmentation, readings and part-of-speech tags. Notice that entries do | |||||
# not have weights since they are always used when found. This is by-design | |||||
# in order to maximize ease-of-use. | |||||
# | |||||
# Entries are defined using the following CSV format: | |||||
# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag> | |||||
# | |||||
# Notice that a single half-width space separates tokens and readings, and | |||||
# that the number tokens and readings must match exactly. | |||||
# | |||||
# Also notice that multiple entries with the same <text> is undefined. | |||||
# | |||||
# Whitespace only lines are ignored. Comments are not allowed on entry lines. | |||||
# | |||||
# Custom segmentation for kanji compounds | |||||
日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 | |||||
関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 | |||||
# Custom segmentation for compound katakana | |||||
トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 | |||||
ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 | |||||
# Custom reading for former sumo wrestler | |||||
朝青龍,朝青龍,アサショウリュウ,カスタム人名 |
{"params":{ | |||||
"query":{ | |||||
"defType":"edismax", | |||||
"q.alt":"*:*", | |||||
"rows":"10", | |||||
"fl":"*,score", | |||||
"":{"v":0}}, | |||||
"facets":{ | |||||
"facet":"on", | |||||
"facet.mincount":"1", | |||||
"f.doc_type.facet.mincount":"0", | |||||
"facet.field":["text_shingles","{!ex=type}doc_type", "language"], | |||||
"f.text_shingles.facet.limit":10, | |||||
"facet.query":"{!ex=type key=all_types}*:*", | |||||
"f.doc_type.facet.missing":true, | |||||
"":{"v":0}}, | |||||
"browse":{ | |||||
"type_fq":"{!field f=doc_type v=$type}", | |||||
"hl":"on", | |||||
"hl.fl":"content", | |||||
"v.locale":"${locale}", | |||||
"debug":"true", | |||||
"hl.simple.pre":"HL_START", | |||||
"hl.simple.post":"HL_END", | |||||
"echoParams": "explicit", | |||||
"_appends_": { | |||||
"fq": "{!switch v=$type tag=type case='*:*' case.all='*:*' case.unknown='-doc_type:[* TO *]' default=$type_fq}" | |||||
}, | |||||
"":{"v":0}}, | |||||
"velocity":{ | |||||
"wt":"velocity", | |||||
"v.template":"browse", | |||||
"v.layout":"layout", | |||||
"":{"v":0}}}} |
# The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
# (the "License"); you may not use this file except in compliance with | |||||
# the License. You may obtain a copy of the License at | |||||
# | |||||
# http://www.apache.org/licenses/LICENSE-2.0 | |||||
# | |||||
# Unless required by applicable law or agreed to in writing, software | |||||
# distributed under the License is distributed on an "AS IS" BASIS, | |||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
# See the License for the specific language governing permissions and | |||||
# limitations under the License. | |||||
#----------------------------------------------------------------------- | |||||
# Use a protected word file to protect against the stemmer reducing two | |||||
# unrelated words to the same base word. | |||||
# Some non-words that normally won't be encountered, | |||||
# just to test that they won't be stemmed. | |||||
dontstems | |||||
zwhacky | |||||
<?xml version="1.0" encoding="UTF-8"?> | |||||
<!-- Solr managed schema - automatically generated - DO NOT EDIT --> | |||||
<schema name="example-data-driven-schema" version="1.6"> | |||||
<uniqueKey>id</uniqueKey> | |||||
<fieldType name="ancestor_path" class="solr.TextField"> | |||||
<analyzer type="index"> | |||||
<tokenizer class="solr.KeywordTokenizerFactory"/> | |||||
</analyzer> | |||||
<analyzer type="query"> | |||||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="binary" class="solr.BinaryField"/> | |||||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | |||||
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> | |||||
<fieldType name="currency" class="solr.CurrencyFieldType" amountLongSuffix="_l_ns" codeStrSuffix="_s_ns" defaultCurrency="USD" currencyConfig="currency.xml" /> | |||||
<fieldType name="descendent_path" class="solr.TextField"> | |||||
<analyzer type="index"> | |||||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> | |||||
</analyzer> | |||||
<analyzer type="query"> | |||||
<tokenizer class="solr.KeywordTokenizerFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/> | |||||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | |||||
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" maxDistErr="0.001" distErrPct="0.025" distanceUnits="kilometers"/> | |||||
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.KeywordTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="phonetic_en" class="solr.TextField" indexed="true" stored="false"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/> | |||||
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> | |||||
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> | |||||
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> | |||||
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> | |||||
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> | |||||
<fieldType name="pint" class="solr.IntPointField" docValues="true"/> | |||||
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> | |||||
<fieldType name="plong" class="solr.LongPointField" docValues="true"/> | |||||
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> | |||||
<fieldType name="point" class="solr.PointType" subFieldSuffix="_d" dimension="2"/> | |||||
<fieldType name="random" class="solr.RandomSortField" indexed="true"/> | |||||
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/> | |||||
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/> | |||||
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ar.txt" ignoreCase="true"/> | |||||
<filter class="solr.ArabicNormalizationFilterFactory"/> | |||||
<filter class="solr.ArabicStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_bg.txt" ignoreCase="true"/> | |||||
<filter class="solr.BulgarianStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_ca.txt" ignoreCase="true"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ca.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.CJKWidthFilterFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.CJKBigramFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_cz.txt" ignoreCase="true"/> | |||||
<filter class="solr.CzechStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_da.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_de.txt" ignoreCase="true"/> | |||||
<filter class="solr.GermanNormalizationFilterFactory"/> | |||||
<filter class="solr.GermanLightStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.GreekLowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_el.txt" ignoreCase="false"/> | |||||
<filter class="solr.GreekStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer type="index"> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.EnglishPossessiveFilterFactory"/> | |||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
<filter class="solr.PorterStemFilterFactory"/> | |||||
</analyzer> | |||||
<analyzer type="query"> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.EnglishPossessiveFilterFactory"/> | |||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
<filter class="solr.PorterStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |||||
<analyzer type="index"> | |||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
<filter class="solr.PorterStemFilterFactory"/> | |||||
<filter class="solr.FlattenGraphFilterFactory" /> | |||||
</analyzer> | |||||
<analyzer type="query"> | |||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
<filter class="solr.PorterStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |||||
<analyzer type="index"> | |||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
<filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
<filter class="solr.EnglishMinimalStemFilterFactory"/> | |||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |||||
<filter class="solr.FlattenGraphFilterFactory" /> | |||||
</analyzer> | |||||
<analyzer type="query"> | |||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
<filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||||
<filter class="solr.EnglishMinimalStemFilterFactory"/> | |||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_es.txt" ignoreCase="true"/> | |||||
<filter class="solr.SpanishLightStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_eu.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<charFilter class="solr.PersianCharFilterFactory"/> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.ArabicNormalizationFilterFactory"/> | |||||
<filter class="solr.PersianNormalizationFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_fa.txt" ignoreCase="true"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fi.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_fr.txt" ignoreCase="true"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fr.txt" ignoreCase="true"/> | |||||
<filter class="solr.FrenchLightStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_ga.txt" ignoreCase="true"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/hyphenations_ga.txt" ignoreCase="true"/> | |||||
<filter class="solr.IrishLowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ga.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> | |||||
<analyzer type="index"> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
</analyzer> | |||||
<analyzer type="query"> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||||
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer type="index"> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.ReversedWildcardFilterFactory" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/> | |||||
</analyzer> | |||||
<analyzer type="query"> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_gl.txt" ignoreCase="true"/> | |||||
<filter class="solr.GalicianStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.IndicNormalizationFilterFactory"/> | |||||
<filter class="solr.HindiNormalizationFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_hi.txt" ignoreCase="true"/> | |||||
<filter class="solr.HindiStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_hu.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_hy.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_id.txt" ignoreCase="true"/> | |||||
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt" ignoreCase="true"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_it.txt" ignoreCase="true"/> | |||||
<filter class="solr.ItalianLightStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_ja" class="solr.TextField" autoGeneratePhraseQueries="false" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> | |||||
<filter class="solr.JapaneseBaseFormFilterFactory"/> | |||||
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt"/> | |||||
<filter class="solr.CJKWidthFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ja.txt" ignoreCase="true"/> | |||||
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> | |||||
<filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> | |||||
<filter class="solr.KoreanReadingFormFilterFactory" /> | |||||
<filter class="solr.LowerCaseFilterFactory" /> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_lv.txt" ignoreCase="true"/> | |||||
<filter class="solr.LatvianStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_nl.txt" ignoreCase="true"/> | |||||
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_no.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_pt.txt" ignoreCase="true"/> | |||||
<filter class="solr.PortugueseLightStemFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ro.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_ru.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_sv.txt" ignoreCase="true"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.ThaiTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_th.txt" ignoreCase="true"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<filter class="solr.TurkishLowerCaseFilterFactory"/> | |||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_tr.txt" ignoreCase="false"/> | |||||
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_email_url" class="solr.TextField"> | |||||
<analyzer> | |||||
<tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> | |||||
<filter class="solr.TypeTokenFilterFactory" types="email_url_types.txt" useWhitelist="true"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<fieldType name="text_shingles" class="solr.TextField" positionIncrementGap="100" multiValued="true"> | |||||
<analyzer type="index"> | |||||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||||
<!-- <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="false" /> --> | |||||
<filter class="solr.LengthFilterFactory" min="2" max="18"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
<filter class="solr.PatternReplaceFilterFactory" pattern="(^[^a-z]+$)" replacement="" replace="all"/> | |||||
<filter class="solr.ShingleFilterFactory" minShingleSize="3" maxShingleSize="3" | |||||
outputUnigrams="false" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="*"/> | |||||
<filter class="solr.PatternReplaceFilterFactory" pattern="(.*[\*].*)" replacement=""/> | |||||
<filter class="solr.TrimFilterFactory"/> | |||||
<!-- PRFF could have removed everything down to an empty string, remove if so --> | |||||
<filter class="solr.LengthFilterFactory" min="1" max="100"/> | |||||
</analyzer> | |||||
<analyzer type="query"> | |||||
<tokenizer class="solr.KeywordTokenizerFactory"/> | |||||
<filter class="solr.LowerCaseFilterFactory"/> | |||||
</analyzer> | |||||
</fieldType> | |||||
<field name="id" type="string" multiValued="false" indexed="true" required="true" stored="true"/> | |||||
<field name="_version_" type="plong" indexed="true" stored="true"/> | |||||
<field name="content_type" type="string" indexed="true" stored="true"/> | |||||
<field name="doc_type" type="string" indexed="true" stored="true"/> | |||||
<field name="title" type="string" indexed="true" stored="true"/> | |||||
<field name="language" type="string" indexed="true" stored="true"/> | |||||
<field name="content" type="text_general" multiValued="false" indexed="true" stored="true"/> | |||||
<field name="text_shingles" type="text_shingles" indexed="true" stored="false"/> | |||||
<field name="_text_" type="text_general" multiValued="true" indexed="true" stored="false"/> | |||||
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> | |||||
<dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> | |||||
<dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> | |||||
<dynamicField name="*_coordinate" type="pdouble" indexed="true" stored="false"/> | |||||
<dynamicField name="ignored_*" type="ignored" multiValued="true"/> | |||||
<dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/> | |||||
<dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> | |||||
<dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/> | |||||
<dynamicField name="random_*" type="random"/> | |||||
<dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_fa" type="text_fa" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/> | |||||
<dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/> | |||||
<dynamicField name="*_point" type="point" indexed="true" stored="true"/> | |||||
<dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/> | |||||
<dynamicField name="attr_*" type="text_general" multiValued="true" indexed="true" stored="true"/> | |||||
<dynamicField name="*_l_ns" type="plong" indexed="true" stored="false"/> | |||||
<dynamicField name="*_s_ns" type="string" indexed="true" stored="false"/> | |||||
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/> | |||||
<dynamicField name="*_dts" type="pdate" multiValued="true" indexed="true" stored="true"/> | |||||
<dynamicField name="*_is" type="pints" indexed="true" stored="true"/> | |||||
<dynamicField name="*_ss" type="strings" indexed="true" stored="true"/> | |||||
<dynamicField name="*_ls" type="plongs" indexed="true" stored="true"/> | |||||
<dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/> | |||||
<dynamicField name="*_fs" type="pfloats" indexed="true" stored="true"/> | |||||
<dynamicField name="*_ds" type="pdoubles" indexed="true" stored="true"/> | |||||
<dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> | |||||
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> | |||||
<dynamicField name="*_i" type="pint" indexed="true" stored="true"/> | |||||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/> | |||||
<dynamicField name="*_l" type="plong" indexed="true" stored="true"/> | |||||
<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/> | |||||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> | |||||
<dynamicField name="*_f" type="pfloat" indexed="true" stored="true"/> | |||||
<dynamicField name="*_d" type="pdouble" indexed="true" stored="true"/> | |||||
<dynamicField name="*_p" type="location" indexed="true" stored="true"/> | |||||
<dynamicField name="*_c" type="currency" indexed="true" stored="true"/> | |||||
<copyField source="content" dest="text_shingles"/> | |||||
<copyField source="*" dest="_text_"/> | |||||
<!-- ADDED BY SIMON BOWIE 2022-04-04 --> | |||||
<copyField source="content" dest="year"/> | |||||
<field name="year" type="year" indexed="true" stored="true"/> | |||||
<fieldType name="year" class="solr.TextField" positionIncrementGap="100"> | |||||
<analyzer> | |||||
<tokenizer class="solr.PatternTokenizerFactory" pattern="=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})" group="1" /> | |||||
</analyzer> | |||||
</fieldType> | |||||
<!-- END --> | |||||
</schema> |
# Licensed to the Apache Software Foundation (ASF) under one or more | |||||
# contributor license agreements. See the NOTICE file distributed with | |||||
# this work for additional information regarding copyright ownership. | |||||
# The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
# (the "License"); you may not use this file except in compliance with | |||||
# the License. You may obtain a copy of the License at | |||||
# | |||||
# http://www.apache.org/licenses/LICENSE-2.0 | |||||
# | |||||
# Unless required by applicable law or agreed to in writing, software | |||||
# distributed under the License is distributed on an "AS IS" BASIS, | |||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
# See the License for the specific language governing permissions and | |||||
# limitations under the License. |
# The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
# (the "License"); you may not use this file except in compliance with | |||||
# the License. You may obtain a copy of the License at | |||||
# | |||||
# http://www.apache.org/licenses/LICENSE-2.0 | |||||
# | |||||
# Unless required by applicable law or agreed to in writing, software | |||||
# distributed under the License is distributed on an "AS IS" BASIS, | |||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
# See the License for the specific language governing permissions and | |||||
# limitations under the License. | |||||
#----------------------------------------------------------------------- | |||||
#some test synonym mappings unlikely to appear in real input text | |||||
aaafoo => aaabar | |||||
bbbfoo => bbbfoo bbbbar | |||||
cccfoo => cccbar cccbaz | |||||
fooaaa,baraaa,bazaaa | |||||
# Some synonym groups specific to this example | |||||
GB,gib,gigabyte,gigabytes | |||||
MB,mib,megabyte,megabytes | |||||
Television, Televisions, TV, TVs | |||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming | |||||
#after us won't split it into two words. | |||||
# Synonym mappings can be used for spelling correction too | |||||
pixima => pixma | |||||
function get_class(name) { | |||||
var clazz; | |||||
try { | |||||
// Java8 Nashorn | |||||
clazz = eval("Java.type(name).class"); | |||||
} catch(e) { | |||||
// Java7 Rhino | |||||
clazz = eval("Packages."+name); | |||||
} | |||||
return clazz; | |||||
} | |||||
function processAdd(cmd) { | |||||
doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument | |||||
var id = doc.getFieldValue("id"); | |||||
logger.info("update-script#processAdd: id=" + id); | |||||
// The idea here is to use the file's content_type value to | |||||
// simplify into user-friendly values, such that types of, say, image/jpeg and image/tiff | |||||
// are in an "Images" facet | |||||
var ct = doc.getFieldValue("content_type"); | |||||
if (ct) { | |||||
// strip off semicolon onward | |||||
var semicolon_index = ct.indexOf(';'); | |||||
if (semicolon_index != -1) { | |||||
ct = ct.substring(0,semicolon_index); | |||||
} | |||||
// and split type/subtype | |||||
var ct_type = ct.substring(0,ct.indexOf('/')); | |||||
var ct_subtype = ct.substring(ct.indexOf('/')+1); | |||||
var doc_type; | |||||
switch(true) { | |||||
case /^application\/rtf/.test(ct) || /wordprocessing/.test(ct): | |||||
doc_type = "doc"; | |||||
break; | |||||
case /html/.test(ct): | |||||
doc_type = "html"; | |||||
break; | |||||
case /^image\/.*/.test(ct): | |||||
doc_type = "image"; | |||||
break; | |||||
case /presentation|powerpoint/.test(ct): | |||||
doc_type = "presentation"; | |||||
break; | |||||
case /spreadsheet|excel/.test(ct): | |||||
doc_type = "spreadsheet"; | |||||
break; | |||||
case /^application\/pdf/.test(ct): | |||||
doc_type = "pdf"; | |||||
break; | |||||
case /^text\/plain/.test(ct): | |||||
doc_type = "text" | |||||
break; | |||||
default: | |||||
break; | |||||
} | |||||
// TODO: error handling needed? What if there is no slash? | |||||
if(doc_type) { doc.setField("doc_type", doc_type); } | |||||
doc.setField("content_type_type_s", ct_type); | |||||
doc.setField("content_type_subtype_s", ct_subtype); | |||||
} | |||||
var content = doc.getFieldValue("content"); | |||||
if (!content) { | |||||
return; //No content found, so we are done here | |||||
} | |||||
var analyzer = | |||||
req.getCore().getLatestSchema() | |||||
.getFieldTypeByName("text_email_url") | |||||
.getIndexAnalyzer(); | |||||
var token_stream = | |||||
analyzer.tokenStream("content", content); | |||||
var term_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.CharTermAttribute")); | |||||
var type_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.TypeAttribute")); | |||||
token_stream.reset(); | |||||
while (token_stream.incrementToken()) { | |||||
doc.addField(type_att.type().replace(/\<|\>/g,'').toLowerCase()+"_ss", term_att.toString()); | |||||
} | |||||
token_stream.end(); | |||||
token_stream.close(); | |||||
} | |||||
function processDelete(cmd) { | |||||
// no-op | |||||
} | |||||
function processMergeIndexes(cmd) { | |||||
// no-op | |||||
} | |||||
function processCommit(cmd) { | |||||
// no-op | |||||
} | |||||
function processRollback(cmd) { | |||||
// no-op | |||||
} | |||||
function finish() { | |||||
// no-op | |||||
} |
<div id="query-box"> | |||||
<form id="query-form" action="#{url_for_home}" method="GET"> | |||||
$resource.find: | |||||
<input type="text" id="q" name="q" style="width: 50%" value="$!esc.html($request.params.get('q'))"/> | |||||
<input type="submit" value="$resource.submit"/> | |||||
<div id="debug_query" class="debug"> | |||||
<span id="parsed_query">$esc.html($response.response.debug.parsedquery)</span> | |||||
</div> | |||||
<input type="hidden" name="type" value="#current_type"/> | |||||
#if("#current_locale"!="")<input type="hidden" value="locale" value="#current_locale"/>#end | |||||
#foreach($fq in $response.responseHeader.params.getAll("fq")) | |||||
<input type="hidden" name="fq" id="allFQs" value="$esc.html($fq)"/> | |||||
#end | |||||
</form> | |||||
<div id="constraints"> | |||||
#foreach($fq in $response.responseHeader.params.getAll("fq")) | |||||
#set($previous_fq_count=$velocityCount - 1) | |||||
#if($fq != '') | |||||
> $fq<a href="#url_for_filters($response.responseHeader.params.fq.subList(0,$previous_fq_count))">x</a> | |||||
#end | |||||
#end | |||||
</div> | |||||
</div> | |||||
<div id="browse_results"> | |||||
#parse("results.vm") | |||||
</div> | |||||
## intentionally empty | |||||
<div id="facet_$field.name"> | |||||
<span class="facet-field">$resource.facet.top_phrases</span><br/> | |||||
<ul id="tagcloud"> | |||||
#foreach($facet in $sort.sort($field.values,"name")) | |||||
<li data-weight="$math.mul($facet.count,1)"> | |||||
<a href="#url_for_facet_filter($field.name, $facet.name)">$facet.name</a> | |||||
</li> | |||||
#end | |||||
</ul> | |||||
</div> |
#if($response.facetFields.size() > 0) | |||||
#foreach($field in $response.facetFields) | |||||
#if($field.values.size() > 0) | |||||
#if($engine.resourceExists("facet_${field.name}.vm")) | |||||
#parse("facet_${field.name}.vm") | |||||
#else | |||||
<div id="facet_$field.name" class="facet_field"> | |||||
<span class="facet-field">#label("facet.${field.name}",$field.name)</span><br/> | |||||
<ul> | |||||
#foreach($facet in $field.values) | |||||
<li><a href="#url_for_facet_filter($field.name, $facet.name)">#if($facet.name!=$null)#label("${field.name}.${facet.name}","${field.name}.${facet.name}")#else<em>missing</em>#end</a> ($facet.count)</li> | |||||
#end | |||||
</ul> | |||||
</div> | |||||
#end | |||||
#end | |||||
#end ## end if field.values > 0 | |||||
#end ## end if facetFields > 0 | |||||
<hr/> | |||||
<div> | |||||
<div id="admin"><a href="#url_root/index.html#/#{core_name}">Solr Admin</a></div> | |||||
<a href="#" onclick='jQuery(".debug").toggle(); return false;'>toggle debug mode</a> | |||||
<a href="#url_for_lens&wt=xml#if($debug)&debug=true#end">XML results</a> ## TODO: Add links for other formats, maybe dynamically? | |||||
</div> | |||||
<div> | |||||
<a href="http://lucene.apache.org/solr">Solr Home Page</a> | |||||
</div> | |||||
<div class="debug"> | |||||
<hr/> | |||||
Request: | |||||
<pre> | |||||
$esc.html($request) | |||||
</pre> | |||||
<hr/> | |||||
Debug: | |||||
<pre> | |||||
$esc.html($response.response.debug) | |||||
</pre> | |||||
</div> |
<title>Solr browse: #core_name</title> | |||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/> | |||||
<link rel="icon" type="image/x-icon" href="#{url_root}/img/favicon.ico"/> | |||||
<link rel="shortcut icon" type="image/x-icon" href="#{url_root}/img/favicon.ico"/> | |||||
<script type="text/javascript" src="#{url_root}/libs/jquery-3.4.1.min.js"></script> | |||||
<script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/jquery.tx3-tag-cloud.js&contentType=text/javascript"></script> | |||||
<script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/dropit.js&contentType=text/javascript"></script> | |||||
<script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/jquery.autocomplete.js&contentType=text/javascript"></script> | |||||
<script type="text/javascript"> | |||||
$(document).ready(function() { | |||||
$("#tagcloud").tx3TagCloud({ | |||||
multiplier: 1 | |||||
}); | |||||
$('.menu').dropit(); | |||||
$( document ).ajaxComplete(function() { | |||||
$("#tagcloud").tx3TagCloud({ | |||||
multiplier: 5 | |||||
}); | |||||
}); | |||||
$('\#q').keyup(function() { | |||||
$('#browse_results').load('#{url_for_home}?#lensNoQ&v.layout.enabled=false&v.template=results&q='+encodeURI($('\#q').val())); | |||||
$("\#q").autocomplete('#{url_for_solr}/suggest', { | |||||
extraParams: { | |||||
'suggest.q': function() { return $("\#q").val();}, | |||||
'suggest.build': 'true', | |||||
'wt': 'json', | |||||
} | |||||
}).keydown(function(e) { | |||||
if (e.keyCode === 13){ | |||||
$("#query-form").trigger('submit'); | |||||
} | |||||
}); | |||||
}); | |||||
}); | |||||
</script> | |||||
<style> | |||||
html { | |||||
background-color: #F0F8FF; | |||||
} | |||||
body { | |||||
font-family: Helvetica, Arial, sans-serif; | |||||
font-size: 10pt; | |||||
} | |||||
#header { | |||||
width: 100%; | |||||
font-size: 20pt; | |||||
} | |||||
#header2 { | |||||
margin-left:1200px; | |||||
} | |||||
#logo { | |||||
width: 115px; | |||||
margin: 0px 0px 0px 0px; | |||||
border-style: none; | |||||
} | |||||
a { | |||||
color: #305CB3; | |||||
} | |||||
a.hidden { | |||||
display:none; | |||||
} | |||||
em { | |||||
color: #FF833D; | |||||
} | |||||
.error { | |||||
color: white; | |||||
background-color: red; | |||||
left: 210px; | |||||
width:80%; | |||||
position: relative; | |||||
} | |||||
.debug { display: none; font-size: 10pt} | |||||
#debug_query { | |||||
font-family: Helvetica, Arial, sans-serif; | |||||
font-size: 10pt; | |||||
font-weight: bold; | |||||
} | |||||
#parsed_query { | |||||
font-family: Courier, Courier New, monospaced; | |||||
font-size: 10pt; | |||||
font-weight: normal; | |||||
} | |||||
#admin { | |||||
text-align: right; | |||||
vertical-align: top; | |||||
} | |||||
#query-form { | |||||
width: 90%; | |||||
} | |||||
#query-box { | |||||
padding: 5px; | |||||
margin: 5px; | |||||
font-weight: normal; | |||||
font-size: 24px; | |||||
letter-spacing: 0.08em; | |||||
} | |||||
#constraints { | |||||
margin: 10px; | |||||
} | |||||
#tabs { } | |||||
#tabs li { display: inline; font-size: 10px;} | |||||
#tabs li a { border-radius: 20px; border: 2px solid #C1CDCD; padding: 10px;color: #42454a; background-color: #dedbde;} | |||||
#tabs li a:hover { background-color: #f1f0ee; } | |||||
#tabs li a.selected { color: #000; background-color: #f1f0ee; font-weight: bold; padding: 5px } | |||||
#tabs li a.no_results { color: #000; background-color: #838B8B; font-style: italic; padding: 5px; pointer-events: none; | |||||
cursor: default; text-decoration: none;} | |||||
.pagination { | |||||
width: 305px; | |||||
border-radius: 25px; | |||||
border: 2px solid #C1CDCD; | |||||
padding: 20px; | |||||
padding-left: 10%; | |||||
background: #eee; | |||||
margin-left: 190px; | |||||
margin-top : 42px; | |||||
padding-top: 5px; | |||||
padding-bottom: 5px; | |||||
text-align:left; | |||||
} | |||||
#results_list { width: 70%; } | |||||
.result-document { | |||||
border-radius: 25px; | |||||
border: 2px solid #C1CDCD; | |||||
padding: 10px; | |||||
// width: 800px; | |||||
// height: 120px; | |||||
margin: 5px; | |||||
// margin-left: 60px; | |||||
// margin-right: 210px; | |||||
// margin-bottom: 15px; | |||||
transition: 1s ease; | |||||
} | |||||
.result-document:hover | |||||
{ | |||||
webkit-transform: scale(1.1); | |||||
-ms-transform: scale(1.1); | |||||
transform: scale(1.1); | |||||
transition: 1s ease; | |||||
} | |||||
.result-document div { | |||||
padding: 5px; | |||||
} | |||||
.result-title { | |||||
width:60%; | |||||
} | |||||
.result-body { | |||||
background: #ddd; | |||||
} | |||||
.result-document:nth-child(2n+1) { | |||||
background-color: #FFFFFD; | |||||
} | |||||
#facets { | |||||
margin: 5px; | |||||
margin-top: 0px; | |||||
padding: 5px; | |||||
top: -20px; | |||||
position: relative; | |||||
float: right; | |||||
width: 25%; | |||||
} | |||||
.facet-field { | |||||
font-weight: bold; | |||||
} | |||||
#facets ul { | |||||
list-style: none; | |||||
margin: 0; | |||||
margin-bottom: 5px; | |||||
margin-top: 5px; | |||||
padding-left: 10px; | |||||
} | |||||
#facets ul li { | |||||
color: #999; | |||||
padding: 2px; | |||||
} | |||||
div.facet_field { | |||||
clear: left; | |||||
} | |||||
ul.tx3-tag-cloud { } | |||||
ul.tx3-tag-cloud li { | |||||
display: block; | |||||
float: left; | |||||
list-style: none; | |||||
margin-right: 4px; | |||||
} | |||||
ul.tx3-tag-cloud li a { | |||||
display: block; | |||||
text-decoration: none; | |||||
color: #c9c9c9; | |||||
padding: 3px 10px; | |||||
} | |||||
ul.tx3-tag-cloud li a:hover { | |||||
color: #000000; | |||||
-webkit-transition: color 250ms linear; | |||||
-moz-transition: color 250ms linear; | |||||
-o-transition: color 250ms linear; | |||||
-ms-transition: color 250ms linear; | |||||
transition: color 250ms linear; | |||||
} | |||||
.dropit { | |||||
list-style: none; | |||||
padding: 0; | |||||
margin: 0; | |||||
} | |||||
.dropit .dropit-trigger { position: relative; } | |||||
.dropit .dropit-submenu { | |||||
position: absolute; | |||||
top: 100%; | |||||
left: 0; /* dropdown left or right */ | |||||
z-index: 1000; | |||||
display: none; | |||||
min-width: 150px; | |||||
list-style: none; | |||||
padding: 0; | |||||
margin: 0; | |||||
} | |||||
.dropit .dropit-open .dropit-submenu { display: block; } | |||||
<!--autocomplete css--> | |||||
.ac_results { | |||||
padding: 0px; | |||||
border: 1px solid black; | |||||
background-color: white; | |||||
overflow: hidden; | |||||
z-index: 99999; | |||||
} | |||||
.ac_results ul { | |||||
width: 100%; | |||||
list-style-position: outside; | |||||
list-style: none; | |||||
padding: 0; | |||||
margin: 0; | |||||
} | |||||
.ac_results li { | |||||
margin: 0px; | |||||
padding: 2px 5px; | |||||
cursor: default; | |||||
display: block; | |||||
font: menu; | |||||
font-size: 12px; | |||||
line-height: 16px; | |||||
overflow: hidden; | |||||
} | |||||
.ac_loading { | |||||
// background: white url('˜indicator.gif') right center no-repeat; | |||||
} | |||||
.ac_odd { | |||||
background-color: #eee; | |||||
} | |||||
.ac_over { | |||||
background-color: #0A246A; | |||||
color: white; | |||||
} | |||||
</style> |
#set($docId = $doc.getFirstValue($request.schema.uniqueKeyField.name)) | |||||
## Load Mime-Type List and Mapping | |||||
#parse('mime_type_lists.vm') | |||||
## Title | |||||
#if($doc.getFieldValue('title')) | |||||
#set($title = $esc.html($doc.getFirstValue('title'))) | |||||
#else | |||||
#set($title = "$doc.getFirstValue('id').substring($math.add(1,$doc.getFirstValue('id').lastIndexOf('/')))") | |||||
#end | |||||
## Date | |||||
#if($doc.getFieldValue('attr_meta_creation_date')) | |||||
#set($date = $esc.html($doc.getFirstValue('attr_meta_creation_date'))) | |||||
#else | |||||
#set($date = "No date found") | |||||
#end | |||||
## URL | |||||
#if($doc.getFieldValue('url')) | |||||
#set($url = $doc.getFieldValue('url')) | |||||
#elseif($doc.getFieldValue('resourcename')) | |||||
#set($url = "file:///$doc.getFirstValue('resourcename')") | |||||
#else | |||||
#set($url = "$doc.getFieldValue('id')") | |||||
#end | |||||
## Sort out Mime-Type | |||||
#set($ct = $doc.getFirstValue('content_type').split(";").get(0)) | |||||
#set($filename = $doc.getFirstValue('resourcename')) | |||||
#set($filetype = false) | |||||
#set($filetype = $mimeExtensionsMap.get($ct)) | |||||
#if(!$filetype) | |||||
#set($filetype = $filename.substring($filename.lastIndexOf(".")).substring(1)) | |||||
#end | |||||
#if(!$filetype) | |||||
#set($filetype = "file") | |||||
#end | |||||
#if(!$supportedMimeTypes.contains($filetype)) | |||||
#set($filetype = "file") | |||||
#end | |||||
<div class="result-document"> | |||||
<span class="result-title"> | |||||
<img src="#{url_root}/img/filetypes/${filetype}.png" align="center"> | |||||
<b>$title</b> | |||||
</span> | |||||
<div> | |||||
id: $docId </br> | |||||
</div> | |||||
#set($pad = "") | |||||
#foreach($v in $response.response.highlighting.get($docId).get("content")) | |||||
$pad$esc.html($v).replace("HL_START","<em>").replace("HL_END","</em>") | |||||
#set($pad = " ... ") | |||||
#end | |||||
</div> | |||||
<a href="#" class="debug" onclick='jQuery(this).next().toggle(); return false;'>toggle explain</a> | |||||
<pre style="display: none;"> | |||||
$esc.html($response.getExplainMap().get($doc.getFirstValue('id'))) | |||||
</pre> | |||||
<a href="#" class="debug" onclick='jQuery(this).next().toggle(); return false;'>show all fields</a> | |||||
<pre style="display:none;"> | |||||
#foreach($fieldname in $doc.fieldNames) | |||||
<span>$fieldname :</span> | |||||
<span>#foreach($value in $doc.getFieldValues($fieldname))$esc.html($value)#end</span> | |||||
#end | |||||
</pre> | |||||
/* | |||||
* Dropit v1.1.0 | |||||
* http://dev7studios.com/dropit | |||||
* | |||||
* Copyright 2012, Dev7studios | |||||
* Free to use and abuse under the MIT license. | |||||
* http://www.opensource.org/licenses/mit-license.php | |||||
*/ | |||||
;(function($) { | |||||
$.fn.dropit = function(method) { | |||||
var methods = { | |||||
init : function(options) { | |||||
this.dropit.settings = $.extend({}, this.dropit.defaults, options); | |||||
return this.each(function() { | |||||
var $el = $(this), | |||||
el = this, | |||||
settings = $.fn.dropit.settings; | |||||
// Hide initial submenus | |||||
$el.addClass('dropit') | |||||
.find('>'+ settings.triggerParentEl +':has('+ settings.submenuEl +')').addClass('dropit-trigger') | |||||
.find(settings.submenuEl).addClass('dropit-submenu').hide(); | |||||
// Open on click | |||||
$el.off(settings.action).on(settings.action, settings.triggerParentEl +':has('+ settings.submenuEl +') > '+ settings.triggerEl +'', function(){ | |||||
// Close click menu's if clicked again | |||||
if(settings.action == 'click' && $(this).parents(settings.triggerParentEl).hasClass('dropit-open')){ | |||||
settings.beforeHide.call(this); | |||||
$(this).parents(settings.triggerParentEl).removeClass('dropit-open').find(settings.submenuEl).hide(); | |||||
settings.afterHide.call(this); | |||||
return false; | |||||
} | |||||
// Hide open menus | |||||
settings.beforeHide.call(this); | |||||
$('.dropit-open').removeClass('dropit-open').find('.dropit-submenu').hide(); | |||||
settings.afterHide.call(this); | |||||
// Open this menu | |||||
settings.beforeShow.call(this); | |||||
$(this).parents(settings.triggerParentEl).addClass('dropit-open').find(settings.submenuEl).show(); | |||||
settings.afterShow.call(this); | |||||
return false; | |||||
}); | |||||
// Close if outside click | |||||
$(document).on('click', function(){ | |||||
settings.beforeHide.call(this); | |||||
$('.dropit-open').removeClass('dropit-open').find('.dropit-submenu').hide(); | |||||
settings.afterHide.call(this); | |||||
}); | |||||
// If hover | |||||
if(settings.action == 'mouseenter'){ | |||||
$el.on('mouseleave', '.dropit-open', function(){ | |||||
settings.beforeHide.call(this); | |||||
$(this).removeClass('dropit-open').find(settings.submenuEl).hide(); | |||||
settings.afterHide.call(this); | |||||
}); | |||||
} | |||||
settings.afterLoad.call(this); | |||||
}); | |||||
} | |||||
}; | |||||
if (methods[method]) { | |||||
return methods[method].apply(this, Array.prototype.slice.call(arguments, 1)); | |||||
} else if (typeof method === 'object' || !method) { | |||||
return methods.init.apply(this, arguments); | |||||
} else { | |||||
$.error( 'Method "' + method + '" does not exist in dropit plugin!'); | |||||
} | |||||
}; | |||||
$.fn.dropit.defaults = { | |||||
action: 'mouseenter', // The open action for the trigger | |||||
submenuEl: 'ul', // The submenu element | |||||
triggerEl: 'a', // The trigger element | |||||
triggerParentEl: 'li', // The trigger parent element | |||||
afterLoad: function(){}, // Triggers when plugin has loaded | |||||
beforeShow: function(){}, // Triggers before submenu is shown | |||||
afterShow: function(){}, // Triggers after submenu is shown | |||||
beforeHide: function(){}, // Triggers before submenu is hidden | |||||
afterHide: function(){} // Triggers before submenu is hidden | |||||
}; | |||||
$.fn.dropit.settings = {}; | |||||
})(jQuery); |
/* | |||||
* Autocomplete - jQuery plugin 1.1pre | |||||
* | |||||
* Copyright (c) 2007 Dylan Verheul, Dan G. Switzer, Anjesh Tuladhar, Jörn Zaefferer | |||||
* | |||||
* Dual licensed under the MIT and GPL licenses: | |||||
* http://www.opensource.org/licenses/mit-license.php | |||||
* http://www.gnu.org/licenses/gpl.html | |||||
* | |||||
* Revision: Id: jquery.autocomplete.js 5785 2008-07-12 10:37:33Z joern.zaefferer $ | |||||
* | |||||
*/ | |||||
;(function($) { | |||||
$.fn.extend({ | |||||
autocomplete: function(urlOrData, options) { | |||||
var isUrl = typeof urlOrData == "string"; | |||||
options = $.extend({}, $.Autocompleter.defaults, { | |||||
url: isUrl ? urlOrData : null, | |||||
data: isUrl ? null : urlOrData, | |||||
delay: isUrl ? $.Autocompleter.defaults.delay : 10, | |||||
max: options && !options.scroll ? 10 : 150 | |||||
}, options); | |||||
// if highlight is set to false, replace it with a do-nothing function | |||||
options.highlight = options.highlight || function(value) { return value; }; | |||||
// if the formatMatch option is not specified, then use formatItem for backwards compatibility | |||||
options.formatMatch = options.formatMatch || options.formatItem; | |||||
return this.each(function() { | |||||
new $.Autocompleter(this, options); | |||||
}); | |||||
}, | |||||
result: function(handler) { | |||||
return this.bind("result", handler); | |||||
}, | |||||
search: function(handler) { | |||||
return this.trigger("search", [handler]); | |||||
}, | |||||
flushCache: function() { | |||||
return this.trigger("flushCache"); | |||||
}, | |||||
setOptions: function(options){ | |||||
return this.trigger("setOptions", [options]); | |||||
}, | |||||
unautocomplete: function() { | |||||
return this.trigger("unautocomplete"); | |||||
} | |||||
}); | |||||
$.Autocompleter = function(input, options) { | |||||
var KEY = { | |||||
UP: 38, | |||||
DOWN: 40, | |||||
DEL: 46, | |||||
TAB: 9, | |||||
RETURN: 13, | |||||
ESC: 27, | |||||
COMMA: 188, | |||||
PAGEUP: 33, | |||||
PAGEDOWN: 34, | |||||
BACKSPACE: 8 | |||||
}; | |||||
// Create $ object for input element | |||||
var $input = $(input).attr("autocomplete", "off").addClass(options.inputClass); | |||||
var timeout; | |||||
var previousValue = ""; | |||||
var cache = $.Autocompleter.Cache(options); | |||||
var hasFocus = 0; | |||||
var lastKeyPressCode; | |||||
var config = { | |||||
mouseDownOnSelect: false | |||||
}; | |||||
var select = $.Autocompleter.Select(options, input, selectCurrent, config); | |||||
var blockSubmit; | |||||
// prevent form submit in opera when selecting with return key | |||||
$.browser.opera && $(input.form).bind("submit.autocomplete", function() { | |||||
if (blockSubmit) { | |||||
blockSubmit = false; | |||||
return false; | |||||
} | |||||
}); | |||||
// only opera doesn't trigger keydown multiple times while pressed, others don't work with keypress at all | |||||
$input.bind(($.browser.opera ? "keypress" : "keydown") + ".autocomplete", function(event) { | |||||
// track last key pressed | |||||
lastKeyPressCode = event.keyCode; | |||||
switch(event.keyCode) { | |||||
case KEY.UP: | |||||
event.preventDefault(); | |||||
if ( select.visible() ) { | |||||
select.prev(); | |||||
} else { | |||||
onChange(0, true); | |||||
} | |||||
break; | |||||
case KEY.DOWN: | |||||
event.preventDefault(); | |||||
if ( select.visible() ) { | |||||
select.next(); | |||||
} else { | |||||
onChange(0, true); | |||||
} | |||||
break; | |||||
case KEY.PAGEUP: | |||||
event.preventDefault(); | |||||
if ( select.visible() ) { | |||||
select.pageUp(); | |||||
} else { | |||||
onChange(0, true); | |||||
} | |||||
break; | |||||
case KEY.PAGEDOWN: | |||||
event.preventDefault(); | |||||
if ( select.visible() ) { | |||||
select.pageDown(); | |||||
} else { | |||||
onChange(0, true); | |||||
} | |||||
break; | |||||
// matches also semicolon | |||||
case options.multiple && $.trim(options.multipleSeparator) == "," && KEY.COMMA: | |||||
case KEY.TAB: | |||||
case KEY.RETURN: | |||||
if( selectCurrent() ) { | |||||
// stop default to prevent a form submit, Opera needs special handling | |||||
event.preventDefault(); | |||||
blockSubmit = true; | |||||
return false; | |||||
} | |||||
break; | |||||
case KEY.ESC: | |||||
select.hide(); | |||||
break; | |||||
default: | |||||
clearTimeout(timeout); | |||||
timeout = setTimeout(onChange, options.delay); | |||||
break; | |||||
} | |||||
}).focus(function(){ | |||||
// track whether the field has focus, we shouldn't process any | |||||
// results if the field no longer has focus | |||||
hasFocus++; | |||||
}).blur(function() { | |||||
hasFocus = 0; | |||||
if (!config.mouseDownOnSelect) { | |||||
hideResults(); | |||||
} | |||||
}).click(function() { | |||||
// show select when clicking in a focused field | |||||
if ( hasFocus++ > 1 && !select.visible() ) { | |||||
onChange(0, true); | |||||
} | |||||
}).bind("search", function() { | |||||
// TODO why not just specifying both arguments? | |||||
var fn = (arguments.length > 1) ? arguments[1] : null; | |||||
function findValueCallback(q, data) { | |||||
var result; | |||||
if( data && data.length ) { | |||||
for (var i=0; i < data.length; i++) { | |||||
if( data[i].result.toLowerCase() == q.toLowerCase() ) { | |||||
result = data[i]; | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
if( typeof fn == "function" ) fn(result); | |||||
else $input.trigger("result", result && [result.data, result.value]); | |||||
} | |||||
$.each(trimWords($input.val()), function(i, value) { | |||||
request(value, findValueCallback, findValueCallback); | |||||
}); | |||||
}).bind("flushCache", function() { | |||||
cache.flush(); | |||||
}).bind("setOptions", function() { | |||||
$.extend(options, arguments[1]); | |||||
// if we've updated the data, repopulate | |||||
if ( "data" in arguments[1] ) | |||||
cache.populate(); | |||||
}).bind("unautocomplete", function() { | |||||
select.unbind(); | |||||
$input.unbind(); | |||||
$(input.form).unbind(".autocomplete"); | |||||
}); | |||||
function selectCurrent() { | |||||
var selected = select.selected(); | |||||
if( !selected ) | |||||
return false; | |||||
var v = selected.result; | |||||
previousValue = v; | |||||
if ( options.multiple ) { | |||||
var words = trimWords($input.val()); | |||||
if ( words.length > 1 ) { | |||||
v = words.slice(0, words.length - 1).join( options.multipleSeparator ) + options.multipleSeparator + v; | |||||
} | |||||
v += options.multipleSeparator; | |||||
} | |||||
$input.val(v); | |||||
hideResultsNow(); | |||||
$input.trigger("result", [selected.data, selected.value]); | |||||
return true; | |||||
} | |||||
function onChange(crap, skipPrevCheck) { | |||||
if( lastKeyPressCode == KEY.DEL ) { | |||||
select.hide(); | |||||
return; | |||||
} | |||||
var currentValue = $input.val(); | |||||
if ( !skipPrevCheck && currentValue == previousValue ) | |||||
return; | |||||
previousValue = currentValue; | |||||
currentValue = lastWord(currentValue); | |||||
if ( currentValue.length >= options.minChars) { | |||||
$input.addClass(options.loadingClass); | |||||
if (!options.matchCase) | |||||
currentValue = currentValue.toLowerCase(); | |||||
request(currentValue, receiveData, hideResultsNow); | |||||
} else { | |||||
stopLoading(); | |||||
select.hide(); | |||||
} | |||||
}; | |||||
function trimWords(value) { | |||||
if ( !value ) { | |||||
return [""]; | |||||
} | |||||
var words = value.split( options.multipleSeparator ); | |||||
var result = []; | |||||
$.each(words, function(i, value) { | |||||
if ( $.trim(value) ) | |||||
result[i] = $.trim(value); | |||||
}); | |||||
return result; | |||||
} | |||||
function lastWord(value) { | |||||
if ( !options.multiple ) | |||||
return value; | |||||
var words = trimWords(value); | |||||
return words[words.length - 1]; | |||||
} | |||||
// fills in the input box w/the first match (assumed to be the best match) | |||||
// q: the term entered | |||||
// sValue: the first matching result | |||||
function autoFill(q, sValue){ | |||||
// autofill in the complete box w/the first match as long as the user hasn't entered in more data | |||||
// if the last user key pressed was backspace, don't autofill | |||||
if( options.autoFill && (lastWord($input.val()).toLowerCase() == q.toLowerCase()) && lastKeyPressCode != KEY.BACKSPACE ) { | |||||
// fill in the value (keep the case the user has typed) | |||||
$input.val($input.val() + sValue.substring(lastWord(previousValue).length)); | |||||
// select the portion of the value not typed by the user (so the next character will erase) | |||||
$.Autocompleter.Selection(input, previousValue.length, previousValue.length + sValue.length); | |||||
} | |||||
}; | |||||
function hideResults() { | |||||
clearTimeout(timeout); | |||||
timeout = setTimeout(hideResultsNow, 200); | |||||
}; | |||||
function hideResultsNow() { | |||||
var wasVisible = select.visible(); | |||||
select.hide(); | |||||
clearTimeout(timeout); | |||||
stopLoading(); | |||||
if (options.mustMatch) { | |||||
// call search and run callback | |||||
$input.search( | |||||
function (result){ | |||||
// if no value found, clear the input box | |||||
if( !result ) { | |||||
if (options.multiple) { | |||||
var words = trimWords($input.val()).slice(0, -1); | |||||
$input.val( words.join(options.multipleSeparator) + (words.length ? options.multipleSeparator : "") ); | |||||
} | |||||
else | |||||
$input.val( "" ); | |||||
} | |||||
} | |||||
); | |||||
} | |||||
if (wasVisible) | |||||
// position cursor at end of input field | |||||
$.Autocompleter.Selection(input, input.value.length, input.value.length); | |||||
}; | |||||
function receiveData(q, data) { | |||||
if ( data && data.length && hasFocus ) { | |||||
stopLoading(); | |||||
select.display(data, q); | |||||
autoFill(q, data[0].value); | |||||
select.show(); | |||||
} else { | |||||
hideResultsNow(); | |||||
} | |||||
}; | |||||
function request(term, success, failure) { | |||||
if (!options.matchCase) | |||||
term = term.toLowerCase(); | |||||
var data = cache.load(term); | |||||
data = null; // Avoid buggy cache and go to Solr every time | |||||
// recieve the cached data | |||||
if (data && data.length) { | |||||
success(term, data); | |||||
// if an AJAX url has been supplied, try loading the data now | |||||
} else if( (typeof options.url == "string") && (options.url.length > 0) ){ | |||||
var extraParams = { | |||||
timestamp: +new Date() | |||||
}; | |||||
$.each(options.extraParams, function(key, param) { | |||||
extraParams[key] = typeof param == "function" ? param() : param; | |||||
}); | |||||
$.ajax({ | |||||
// try to leverage ajaxQueue plugin to abort previous requests | |||||
mode: "abort", | |||||
// limit abortion to this input | |||||
port: "autocomplete" + input.name, | |||||
dataType: options.dataType, | |||||
url: options.url, | |||||
data: $.extend({ | |||||
q: lastWord(term), | |||||
limit: options.max | |||||
}, extraParams), | |||||
success: function(data) { | |||||
var parsed = options.parse && options.parse(data) || parse(data); | |||||
cache.add(term, parsed); | |||||
success(term, parsed); | |||||
} | |||||
}); | |||||
} else { | |||||
// if we have a failure, we need to empty the list -- this prevents the the [TAB] key from selecting the last successful match | |||||
select.emptyList(); | |||||
failure(term); | |||||
} | |||||
}; | |||||
function parse(data) { | |||||
var parsed = []; | |||||
var rows = data.split("\n"); | |||||
for (var i=0; i < rows.length; i++) { | |||||
var row = $.trim(rows[i]); | |||||
if (row) { | |||||
row = row.split("|"); | |||||
parsed[parsed.length] = { | |||||
data: row, | |||||
value: row[0], | |||||
result: options.formatResult && options.formatResult(row, row[0]) || row[0] | |||||
}; | |||||
} | |||||
} | |||||
return parsed; | |||||
}; | |||||
function stopLoading() { | |||||
$input.removeClass(options.loadingClass); | |||||
}; | |||||
}; | |||||
$.Autocompleter.defaults = { | |||||
inputClass: "ac_input", | |||||
resultsClass: "ac_results", | |||||
loadingClass: "ac_loading", | |||||
minChars: 1, | |||||
delay: 400, | |||||
matchCase: false, | |||||
matchSubset: true, | |||||
matchContains: false, | |||||
cacheLength: 10, | |||||
max: 100, | |||||
mustMatch: false, | |||||
extraParams: {}, | |||||
selectFirst: false, | |||||
formatItem: function(row) { return row[0]; }, | |||||
formatMatch: null, | |||||
autoFill: false, | |||||
width: 0, | |||||
multiple: false, | |||||
multipleSeparator: ", ", | |||||
highlight: function(value, term) { | |||||
return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1") + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "<strong>$1</strong>"); | |||||
}, | |||||
scroll: true, | |||||
scrollHeight: 180 | |||||
}; | |||||
$.Autocompleter.Cache = function(options) { | |||||
var data = {}; | |||||
var length = 0; | |||||
function matchSubset(s, sub) { | |||||
if (!options.matchCase) | |||||
s = s.toLowerCase(); | |||||
var i = s.indexOf(sub); | |||||
if (options.matchContains == "word"){ | |||||
i = s.toLowerCase().search("\\b" + sub.toLowerCase()); | |||||
} | |||||
if (i == -1) return false; | |||||
return i == 0 || options.matchContains; | |||||
}; | |||||
function add(q, value) { | |||||
if (length > options.cacheLength){ | |||||
flush(); | |||||
} | |||||
if (!data[q]){ | |||||
length++; | |||||
} | |||||
data[q] = value; | |||||
} | |||||
function populate(){ | |||||
if( !options.data ) return false; | |||||
// track the matches | |||||
var stMatchSets = {}, | |||||
nullData = 0; | |||||
// no url was specified, we need to adjust the cache length to make sure it fits the local data store | |||||
if( !options.url ) options.cacheLength = 1; | |||||
// track all options for minChars = 0 | |||||
stMatchSets[""] = []; | |||||
// loop through the array and create a lookup structure | |||||
for ( var i = 0, ol = options.data.length; i < ol; i++ ) { | |||||
var rawValue = options.data[i]; | |||||
// if rawValue is a string, make an array otherwise just reference the array | |||||
rawValue = (typeof rawValue == "string") ? [rawValue] : rawValue; | |||||
var value = options.formatMatch(rawValue, i+1, options.data.length); | |||||
if ( value === false ) | |||||
continue; | |||||
var firstChar = value.charAt(0).toLowerCase(); | |||||
// if no lookup array for this character exists, look it up now | |||||
if( !stMatchSets[firstChar] ) | |||||
stMatchSets[firstChar] = []; | |||||
// if the match is a string | |||||
var row = { | |||||
value: value, | |||||
data: rawValue, | |||||
result: options.formatResult && options.formatResult(rawValue) || value | |||||
}; | |||||
// push the current match into the set list | |||||
stMatchSets[firstChar].push(row); | |||||
// keep track of minChars zero items | |||||
if ( nullData++ < options.max ) { | |||||
stMatchSets[""].push(row); | |||||
} | |||||
}; | |||||
// add the data items to the cache | |||||
$.each(stMatchSets, function(i, value) { | |||||
// increase the cache size | |||||
options.cacheLength++; | |||||
// add to the cache | |||||
add(i, value); | |||||
}); | |||||
} | |||||
// populate any existing data | |||||
setTimeout(populate, 25); | |||||
function flush(){ | |||||
data = {}; | |||||
length = 0; | |||||
} | |||||
return { | |||||
flush: flush, | |||||
add: add, | |||||
populate: populate, | |||||
load: function(q) { | |||||
if (!options.cacheLength || !length) | |||||
return null; | |||||
/* | |||||
* if dealing w/local data and matchContains than we must make sure | |||||
* to loop through all the data collections looking for matches | |||||
*/ | |||||
if( !options.url && options.matchContains ){ | |||||
// track all matches | |||||
var csub = []; | |||||
// loop through all the data grids for matches | |||||
for( var k in data ){ | |||||
// don't search through the stMatchSets[""] (minChars: 0) cache | |||||
// this prevents duplicates | |||||
if( k.length > 0 ){ | |||||
var c = data[k]; | |||||
$.each(c, function(i, x) { | |||||
// if we've got a match, add it to the array | |||||
if (matchSubset(x.value, q)) { | |||||
csub.push(x); | |||||
} | |||||
}); | |||||
} | |||||
} | |||||
return csub; | |||||
} else | |||||
// if the exact item exists, use it | |||||
if (data[q]){ | |||||
return data[q]; | |||||
} else | |||||
if (options.matchSubset) { | |||||
for (var i = q.length - 1; i >= options.minChars; i--) { | |||||
var c = data[q.substr(0, i)]; | |||||
if (c) { | |||||
var csub = []; | |||||
$.each(c, function(i, x) { | |||||
if (matchSubset(x.value, q)) { | |||||
csub[csub.length] = x; | |||||
} | |||||
}); | |||||
return csub; | |||||
} | |||||
} | |||||
} | |||||
return null; | |||||
} | |||||
}; | |||||
}; | |||||
$.Autocompleter.Select = function (options, input, select, config) { | |||||
var CLASSES = { | |||||
ACTIVE: "ac_over" | |||||
}; | |||||
var listItems, | |||||
active = -1, | |||||
data, | |||||
term = "", | |||||
needsInit = true, | |||||
element, | |||||
list; | |||||
// Create results | |||||
function init() { | |||||
if (!needsInit) | |||||
return; | |||||
element = $("<div/>") | |||||
.hide() | |||||
.addClass(options.resultsClass) | |||||
.css("position", "absolute") | |||||
.appendTo(document.body); | |||||
list = $("<ul/>").appendTo(element).mouseover( function(event) { | |||||
if(target(event).nodeName && target(event).nodeName.toUpperCase() == 'LI') { | |||||
active = $("li", list).removeClass(CLASSES.ACTIVE).index(target(event)); | |||||
$(target(event)).addClass(CLASSES.ACTIVE); | |||||
} | |||||
}).click(function(event) { | |||||
$(target(event)).addClass(CLASSES.ACTIVE); | |||||
select(); | |||||
// TODO provide option to avoid setting focus again after selection? useful for cleanup-on-focus | |||||
input.focus(); | |||||
return false; | |||||
}).mousedown(function() { | |||||
config.mouseDownOnSelect = true; | |||||
}).mouseup(function() { | |||||
config.mouseDownOnSelect = false; | |||||
}); | |||||
if( options.width > 0 ) | |||||
element.css("width", options.width); | |||||
needsInit = false; | |||||
} | |||||
function target(event) { | |||||
var element = event.target; | |||||
while(element && element.tagName != "LI") | |||||
element = element.parentNode; | |||||
// more fun with IE, sometimes event.target is empty, just ignore it then | |||||
if(!element) | |||||
return []; | |||||
return element; | |||||
} | |||||
function moveSelect(step) { | |||||
listItems.slice(active, active + 1).removeClass(CLASSES.ACTIVE); | |||||
movePosition(step); | |||||
var activeItem = listItems.slice(active, active + 1).addClass(CLASSES.ACTIVE); | |||||
if(options.scroll) { | |||||
var offset = 0; | |||||
listItems.slice(0, active).each(function() { | |||||
offset += this.offsetHeight; | |||||
}); | |||||
if((offset + activeItem[0].offsetHeight - list.scrollTop()) > list[0].clientHeight) { | |||||
list.scrollTop(offset + activeItem[0].offsetHeight - list.innerHeight()); | |||||
} else if(offset < list.scrollTop()) { | |||||
list.scrollTop(offset); | |||||
} | |||||
} | |||||
}; | |||||
function movePosition(step) { | |||||
active += step; | |||||
if (active < 0) { | |||||
active = listItems.size() - 1; | |||||
} else if (active >= listItems.size()) { | |||||
active = 0; | |||||
} | |||||
} | |||||
function limitNumberOfItems(available) { | |||||
return options.max && options.max < available | |||||
? options.max | |||||
: available; | |||||
} | |||||
function fillList() { | |||||
list.empty(); | |||||
var max = limitNumberOfItems(data.length); | |||||
for (var i=0; i < max; i++) { | |||||
if (!data[i]) | |||||
continue; | |||||
var formatted = options.formatItem(data[i].data, i+1, max, data[i].value, term); | |||||
if ( formatted === false ) | |||||
continue; | |||||
var li = $("<li/>").html( options.highlight(formatted, term) ).addClass(i%2 == 0 ? "ac_even" : "ac_odd").appendTo(list)[0]; | |||||
$.data(li, "ac_data", data[i]); | |||||
} | |||||
listItems = list.find("li"); | |||||
if ( options.selectFirst ) { | |||||
listItems.slice(0, 1).addClass(CLASSES.ACTIVE); | |||||
active = 0; | |||||
} | |||||
// apply bgiframe if available | |||||
if ( $.fn.bgiframe ) | |||||
list.bgiframe(); | |||||
} | |||||
return { | |||||
display: function(d, q) { | |||||
init(); | |||||
data = d; | |||||
term = q; | |||||
fillList(); | |||||
}, | |||||
next: function() { | |||||
moveSelect(1); | |||||
}, | |||||
prev: function() { | |||||
moveSelect(-1); | |||||
}, | |||||
pageUp: function() { | |||||
if (active != 0 && active - 8 < 0) { | |||||
moveSelect( -active ); | |||||
} else { | |||||
moveSelect(-8); | |||||
} | |||||
}, | |||||
pageDown: function() { | |||||
if (active != listItems.size() - 1 && active + 8 > listItems.size()) { | |||||
moveSelect( listItems.size() - 1 - active ); | |||||
} else { | |||||
moveSelect(8); | |||||
} | |||||
}, | |||||
hide: function() { | |||||
element && element.hide(); | |||||
listItems && listItems.removeClass(CLASSES.ACTIVE); | |||||
active = -1; | |||||
}, | |||||
visible : function() { | |||||
return element && element.is(":visible"); | |||||
}, | |||||
current: function() { | |||||
return this.visible() && (listItems.filter("." + CLASSES.ACTIVE)[0] || options.selectFirst && listItems[0]); | |||||
}, | |||||
show: function() { | |||||
var offset = $(input).offset(); | |||||
element.css({ | |||||
width: typeof options.width == "string" || options.width > 0 ? options.width : $(input).width(), | |||||
top: offset.top + input.offsetHeight, | |||||
left: offset.left | |||||
}).show(); | |||||
if(options.scroll) { | |||||
list.scrollTop(0); | |||||
list.css({ | |||||
maxHeight: options.scrollHeight, | |||||
overflow: 'auto' | |||||
}); | |||||
if($.browser.msie && typeof document.body.style.maxHeight === "undefined") { | |||||
var listHeight = 0; | |||||
listItems.each(function() { | |||||
listHeight += this.offsetHeight; | |||||
}); | |||||
var scrollbarsVisible = listHeight > options.scrollHeight; | |||||
list.css('height', scrollbarsVisible ? options.scrollHeight : listHeight ); | |||||
if (!scrollbarsVisible) { | |||||
// IE doesn't recalculate width when scrollbar disappears | |||||
listItems.width( list.width() - parseInt(listItems.css("padding-left")) - parseInt(listItems.css("padding-right")) ); | |||||
} | |||||
} | |||||
} | |||||
}, | |||||
selected: function() { | |||||
var selected = listItems && listItems.filter("." + CLASSES.ACTIVE).removeClass(CLASSES.ACTIVE); | |||||
return selected && selected.length && $.data(selected[0], "ac_data"); | |||||
}, | |||||
emptyList: function (){ | |||||
list && list.empty(); | |||||
}, | |||||
unbind: function() { | |||||
element && element.remove(); | |||||
} | |||||
}; | |||||
}; | |||||
$.Autocompleter.Selection = function(field, start, end) { | |||||
if( field.createTextRange ){ | |||||
var selRange = field.createTextRange(); | |||||
selRange.collapse(true); | |||||
selRange.moveStart("character", start); | |||||
selRange.moveEnd("character", end); | |||||
selRange.select(); | |||||
} else if( field.setSelectionRange ){ | |||||
field.setSelectionRange(start, end); | |||||
} else { | |||||
if( field.selectionStart ){ | |||||
field.selectionStart = start; | |||||
field.selectionEnd = end; | |||||
} | |||||
} | |||||
field.focus(); | |||||
}; | |||||
})(jQuery); |
/* | |||||
* ---------------------------------------------------------------------------- | |||||
* "THE BEER-WARE LICENSE" (Revision 42): | |||||
* Tuxes3 wrote this file. As long as you retain this notice you | |||||
* can do whatever you want with this stuff. If we meet some day, and you think | |||||
* this stuff is worth it, you can buy me a beer in return Tuxes3 | |||||
* ---------------------------------------------------------------------------- | |||||
*/ | |||||
(function($) | |||||
{ | |||||
var settings; | |||||
$.fn.tx3TagCloud = function(options) | |||||
{ | |||||
// | |||||
// DEFAULT SETTINGS | |||||
// | |||||
settings = $.extend({ | |||||
multiplier : 1 | |||||
}, options); | |||||
main(this); | |||||
} | |||||
function main(element) | |||||
{ | |||||
// adding style attr | |||||
element.addClass("tx3-tag-cloud"); | |||||
addListElementFontSize(element); | |||||
} | |||||
/** | |||||
* calculates the font size on each li element | |||||
* according to their data-weight attribut | |||||
*/ | |||||
function addListElementFontSize(element) | |||||
{ | |||||
var hDataWeight = -9007199254740992; | |||||
var lDataWeight = 9007199254740992; | |||||
$.each(element.find("li"), function(){ | |||||
cDataWeight = getDataWeight(this); | |||||
if (cDataWeight == undefined) | |||||
{ | |||||
logWarning("No \"data-weight\" attribut defined on <li> element"); | |||||
} | |||||
else | |||||
{ | |||||
hDataWeight = cDataWeight > hDataWeight ? cDataWeight : hDataWeight; | |||||
lDataWeight = cDataWeight < lDataWeight ? cDataWeight : lDataWeight; | |||||
} | |||||
}); | |||||
$.each(element.find("li"), function(){ | |||||
var dataWeight = getDataWeight(this); | |||||
var percent = Math.abs((dataWeight - lDataWeight)/(lDataWeight - hDataWeight)); | |||||
$(this).css('font-size', (1 + (percent * settings['multiplier'])) + "em"); | |||||
}); | |||||
} | |||||
function getDataWeight(element) | |||||
{ | |||||
return parseInt($(element).attr("data-weight")); | |||||
} | |||||
function logWarning(message) | |||||
{ | |||||
console.log("[WARNING] " + Date.now() + " : " + message); | |||||
} | |||||
}(jQuery)); |
<html> | |||||
<head> | |||||
#parse("head.vm") | |||||
</head> | |||||
<body> | |||||
<div id="header"> | |||||
<a href="#url_for_home"><img src="#{url_root}/img/solr.svg" id="logo" title="Solr"/></a> $resource.powered_file_search | |||||
</div> | |||||
<div id="header2" onclick="javascript:locale_select()"> | |||||
<ul class="menu"> | |||||
<li> | |||||
<a href="#"><img src="#{url_for_solr}/admin/file?file=/velocity/img/globe_256.png&contentType=image/png" id="locale_pic" title="locale_select" width="30px" height="27px"/></a> | |||||
<ul> | |||||
<li><a href="#url_for_locale('fr_FR')" #if("#current_locale"=="fr_FR")class="hidden"#end> | |||||
<img src="#{url_for_solr}/admin/file?file=/velocity/img/france_640.png&contentType=image/png" id="french_flag" width="40px" height="40px"/>Français</a></li> | |||||
<li><a href="#url_for_locale('de_DE')" #if("#current_locale"=="de_DE")class="hidden"#end> | |||||
<img src="#{url_for_solr}/admin/file?file=/velocity/img/germany_640.png&contentType=image/png" id="german_flag" width="40px" height="40px"/>Deutsch</a></li> | |||||
<li><a href="#url_for_locale('')" #if("#current_locale"=="")class="hidden"#end> | |||||
<img src="#{url_for_solr}/admin/file?file=/velocity/img/english_640.png&contentType=image/png" id="english_flag" width="40px" height="40px"/>English</a></li> | |||||
</ul> | |||||
</li> | |||||
</ul> | |||||
</div> | |||||
#if($response.response.error.code) | |||||
<div class="error"> | |||||
<h1>ERROR $response.response.error.code</h1> | |||||
$response.response.error.msg | |||||
</div> | |||||
#else | |||||
<div id="content"> | |||||
$content | |||||
</div> | |||||
#end | |||||
<div id="footer"> | |||||
#parse("footer.vm") | |||||
</div> | |||||
</body> | |||||
</html> |
#macro(lensFilterSortOnly)?#if($response.responseHeader.params.getAll("fq").size() > 0)&#fqs($response.responseHeader.params.getAll("fq"))#end#sort($request.params.getParams('sort'))#end | |||||
#macro(lensNoQ)#lensFilterSortOnly&type=#current_type#if("#current_locale"!="")&locale=#current_locale#end#end | |||||
#macro(lensNoType)#lensFilterSortOnly#q#if("#current_locale"!="")&locale=#current_locale#end#end | |||||
#macro(lensNoLocale)#lensFilterSortOnly#q&type=#current_type#end | |||||
## lens modified for example/files - to use fq from responseHeader rather than request, and #debug removed too as it is built into browse params now, also added type to lens | |||||
#macro(lens)#lensNoQ#q#end | |||||
## Macros defined custom for the "files" example | |||||
#macro(url_for_type $type)#url_for_home#lensNoType&type=$type#end | |||||
#macro(current_type)#if($response.responseHeader.params.type)${response.responseHeader.params.type}#{else}all#end#end | |||||
#macro(url_for_locale $locale)#url_for_home#lensNoLocale#if($locale!="")&locale=$locale#end&start=$page.start#end | |||||
#macro(current_locale)$!{response.responseHeader.params.locale}#end | |||||
## Usage: #label(resource_key[, default_value]) - resource_key is used as label if no default value specified and no resource exists | |||||
#macro(label $key $default)#if($resource.get($key).exists)${resource.get($key)}#else#if($default)$default#else${key}#end#end#end |
#** | |||||
* Define some Mime-Types, short and long form | |||||
*# | |||||
## MimeType to extension map for detecting file type | |||||
## and showing proper icon | |||||
## List of types match the icons in /solr/img/filetypes | |||||
## Short MimeType Names | |||||
## Was called $supportedtypes | |||||
#set($supportedMimeTypes = "7z;ai;aiff;asc;audio;bin;bz2;c;cfc;cfm;chm;class;conf;cpp;cs;css;csv;deb;divx;doc;dot;eml;enc;file;gif;gz;hlp;htm;html;image;iso;jar;java;jpeg;jpg;js;lua;m;mm;mov;mp3;mpg;odc;odf;odg;odi;odp;ods;odt;ogg;pdf;pgp;php;pl;png;ppt;ps;py;ram;rar;rb;rm;rpm;rtf;sig;sql;swf;sxc;sxd;sxi;sxw;tar;tex;tgz;txt;vcf;video;vsd;wav;wma;wmv;xls;xml;xpi;xvid;zip") | |||||
## Long Form: map MimeType headers to our Short names | |||||
## Was called $extMap | |||||
#set( $mimeExtensionsMap = { | |||||
"application/x-7z-compressed": "7z", | |||||
"application/postscript": "ai", | |||||
"application/pgp-signature": "asc", | |||||
"application/octet-stream": "bin", | |||||
"application/x-bzip2": "bz2", | |||||
"text/x-c": "c", | |||||
"application/vnd.ms-htmlhelp": "chm", | |||||
"application/java-vm": "class", | |||||
"text/css": "css", | |||||
"text/csv": "csv", | |||||
"application/x-debian-package": "deb", | |||||
"application/msword": "doc", | |||||
"message/rfc822": "eml", | |||||
"image/gif": "gif", | |||||
"application/winhlp": "hlp", | |||||
"text/html": "html", | |||||
"application/java-archive": "jar", | |||||
"text/x-java-source": "java", | |||||
"image/jpeg": "jpeg", | |||||
"application/javascript": "js", | |||||
"application/vnd.oasis.opendocument.chart": "odc", | |||||
"application/vnd.oasis.opendocument.formula": "odf", | |||||
"application/vnd.oasis.opendocument.graphics": "odg", | |||||
"application/vnd.oasis.opendocument.image": "odi", | |||||
"application/vnd.oasis.opendocument.presentation": "odp", | |||||
"application/vnd.oasis.opendocument.spreadsheet": "ods", | |||||
"application/vnd.oasis.opendocument.text": "odt", | |||||
"application/pdf": "pdf", | |||||
"application/pgp-encrypted": "pgp", | |||||
"image/png": "png", | |||||
"application/vnd.ms-powerpoint": "ppt", | |||||
"audio/x-pn-realaudio": "ram", | |||||
"application/x-rar-compressed": "rar", | |||||
"application/vnd.rn-realmedia": "rm", | |||||
"application/rtf": "rtf", | |||||
"application/x-shockwave-flash": "swf", | |||||
"application/vnd.sun.xml.calc": "sxc", | |||||
"application/vnd.sun.xml.draw": "sxd", | |||||
"application/vnd.sun.xml.impress": "sxi", | |||||
"application/vnd.sun.xml.writer": "sxw", | |||||
"application/x-tar": "tar", | |||||
"application/x-tex": "tex", | |||||
"text/plain": "txt", | |||||
"text/x-vcard": "vcf", | |||||
"application/vnd.visio": "vsd", | |||||
"audio/x-wav": "wav", | |||||
"audio/x-ms-wma": "wma", | |||||
"video/x-ms-wmv": "wmv", | |||||
"application/vnd.ms-excel": "xls", | |||||
"application/xml": "xml", | |||||
"application/x-xpinstall": "xpi", | |||||
"application/zip": "zip" | |||||
}) |
<div id="facets"> | |||||
#parse("facets.vm") | |||||
</div> | |||||
<div id="results_list"> | |||||
<div class="pagination"> | |||||
<span class="results-found">$page.results_found</span> $resource.results_found_in.insert(${response.responseHeader.QTime}) | |||||
$resource.page_of.insert($page.current_page_number,$page.page_count) | |||||
</div> | |||||
#parse("results_list.vm") | |||||
<div class="pagination"> | |||||
#link_to_previous_page | |||||
<span class="results-found">$page.results_found</span> $resource.results_found. | |||||
$resource.page_of.insert($page.current_page_number,$page.page_count) | |||||
#link_to_next_page | |||||
</div> | |||||
</div> |
<ul id="tabs"> | |||||
<li><a href="#url_for_type('all')" #if("#current_type"=="all")class="selected"#end>$resource.type.all ($response.response.facet_counts.facet_queries.all_types)</a></li> | |||||
#foreach($type in $response.response.facet_counts.facet_fields.doc_type) | |||||
#if($type.key) | |||||
<li><a href="#url_for_type($type.key)" #if($type.value=="0")class="no_results"#end #if("#current_type"==$type.key)class="selected"#end> #label("type.${type.key}.label", $type.key) ($type.value)</a></li> | |||||
#else | |||||
#if($type.value > 0) | |||||
<li><a href="#url_for_type('unknown')" #if("#current_type"=="unknown")class="selected"#end>$resource.type.unknown ($type.value)</a></li> | |||||
#end | |||||
#end | |||||
#end | |||||
</ul> | |||||
<div id="results"> | |||||
#foreach($doc in $response.results) | |||||
#parse("hit.vm") | |||||
#end | |||||
</div> | |||||
Import() | Import() | ||||
{ | { | ||||
docker exec -it solr solr create_core -c $core | |||||
docker exec -it solr solr create_core -c $core -d custom | |||||
docker exec -ti --user=solr solr bash -c "cp -r /opt/solr/example/files/conf/* /var/solr/data/$core/conf/" | |||||
#docker exec -ti --user=solr solr bash -c "cp -r /opt/solr/example/files/conf/* /var/solr/data/$core/conf/" | |||||
docker restart solr | docker restart solr | ||||
exit;; | exit;; | ||||
z) # index all | z) # index all | ||||
core="all" | core="all" | ||||
location="data/2018 (10381)" | |||||
location="data/pop_rtfs" | |||||
Import | Import | ||||
exit;; | exit;; | ||||
a) # index ACTIVE folder | a) # index ACTIVE folder |