@@ -26,6 +26,7 @@ services: | |||
- '8983:8983' | |||
volumes: | |||
- solrdata:/var/solr | |||
- ./solr_config:/opt/solr/server/solr/configsets/custom | |||
volumes: | |||
solrdata: |
@@ -44,6 +44,14 @@ include '../ops_api.php' | |||
<br><br> | |||
Year: | |||
<?php | |||
echo $result['year']; | |||
?> | |||
<br><br> | |||
EPO publication: | |||
<a href=<?php echo $result['epo_publication_url']; ?>> |
@@ -44,6 +44,11 @@ $cores = array("active", "expanding", "invisible", "multispecies", "surviving"); | |||
<option value="multispecies">multi-species</option> | |||
<option value="surviving">surviving</option> | |||
</select> | |||
sort by: | |||
<select name="sort" id="sort"> | |||
<option value="relevance">relevance</option> | |||
<option value="year">year</option> | |||
</select> | |||
<input type="submit" id="submit" value="search"> | |||
</form> | |||
</div> |
@@ -26,7 +26,7 @@ include '../solr.php'; | |||
<?php | |||
$search_results = solr_search($_POST["search"], $_POST["searchopt"]); | |||
$search_results = solr_search($_POST["search"], $_POST["searchopt"], $_POST["sort"]); | |||
if(is_array($search_results)): | |||
@@ -43,6 +43,14 @@ include '../solr.php'; | |||
<br><br> | |||
Year: | |||
<?php | |||
echo $result['year']; | |||
?> | |||
<br><br> | |||
EPO publication: | |||
<a href=<?php echo $result['epo_publication_url']; ?>> | |||
@@ -72,7 +80,7 @@ include '../solr.php'; | |||
<br><br> | |||
<?php | |||
if ($result['abstract']): | |||
if (isset($result['abstract'])): | |||
?> | |||
Abstract: |
@@ -1,9 +1,15 @@ | |||
<?php | |||
function solr_search($search, $core){ | |||
function solr_search($search, $core, $sort){ | |||
// Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||
$solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json'; | |||
if ($sort == 'relevance'){ | |||
// Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||
$solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json'; | |||
} | |||
else{ | |||
// Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html | |||
$solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json&sort=' . $sort . '%20asc'; | |||
} | |||
// Perform Curl request on the Solr API | |||
$ch = curl_init(); | |||
@@ -100,6 +106,11 @@ function parse_result($id, $input){ | |||
elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) { | |||
$output['abstract'] = $abstract[1]; | |||
} | |||
// Search for the year in the content element and display it | |||
if (preg_match('/=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})/', $input, $year)){ | |||
$output['year'] = $year[1]; | |||
} | |||
return $output; | |||
} | |||
@@ -0,0 +1,67 @@ | |||
<?xml version="1.0" ?> | |||
<!-- | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
--> | |||
<!-- Example exchange rates file for CurrencyField type named "currency" in example schema --> | |||
<currencyConfig version="1.0"> | |||
<rates> | |||
<!-- Updated from http://www.exchangerate.com/ at 2011-09-27 --> | |||
<rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" /> | |||
<rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" /> | |||
<rate from="USD" to="EUR" rate="0.743676" comment="European Euro" /> | |||
<rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" /> | |||
<rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" /> | |||
<rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" /> | |||
<rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" /> | |||
<rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" /> | |||
<rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" /> | |||
<rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" /> | |||
<rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" /> | |||
<rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" /> | |||
<rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" /> | |||
<rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" /> | |||
<rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" /> | |||
<rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" /> | |||
<rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" /> | |||
<rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" /> | |||
<rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" /> | |||
<rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" /> | |||
<rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" /> | |||
<rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" /> | |||
<rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" /> | |||
<rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" /> | |||
<rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" /> | |||
<rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" /> | |||
<rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" /> | |||
<rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" /> | |||
<rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" /> | |||
<rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" /> | |||
<rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" /> | |||
<rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" /> | |||
<rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" /> | |||
<rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" /> | |||
<rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" /> | |||
<rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" /> | |||
<rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" /> | |||
<!-- Cross-rates for some common currencies --> | |||
<rate from="EUR" to="GBP" rate="0.869914" /> | |||
<rate from="EUR" to="NOK" rate="7.800095" /> | |||
<rate from="GBP" to="NOK" rate="8.966508" /> | |||
</rates> | |||
</currencyConfig> |
@@ -0,0 +1,42 @@ | |||
<?xml version="1.0" encoding="UTF-8" ?> | |||
<!-- | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
--> | |||
<!-- If this file is found in the config directory, it will only be | |||
loaded once at startup. If it is found in Solr's data | |||
directory, it will be re-loaded every commit. | |||
See http://wiki.apache.org/solr/QueryElevationComponent for more info | |||
--> | |||
<elevate> | |||
<!-- Query elevation examples | |||
<query text="foo bar"> | |||
<doc id="1" /> | |||
<doc id="2" /> | |||
<doc id="3" /> | |||
</query> | |||
for use with techproducts example | |||
<query text="ipod"> | |||
<doc id="MA147LL/A" /> put the actual ipod at the top | |||
<doc id="IW-02" exclude="true" /> exclude this cable | |||
</query> | |||
--> | |||
</elevate> |
@@ -0,0 +1,2 @@ | |||
<URL> | |||
<EMAIL> |
@@ -0,0 +1,8 @@ | |||
# Set of Catalan contractions for ElisionFilter | |||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||
d | |||
l | |||
m | |||
n | |||
s | |||
t |
@@ -0,0 +1,15 @@ | |||
# Set of French contractions for ElisionFilter | |||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||
l | |||
m | |||
t | |||
qu | |||
n | |||
s | |||
j | |||
d | |||
c | |||
jusqu | |||
quoiqu | |||
lorsqu | |||
puisqu |
@@ -0,0 +1,5 @@ | |||
# Set of Irish contractions for ElisionFilter | |||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||
d | |||
m | |||
b |
@@ -0,0 +1,23 @@ | |||
# Set of Italian contractions for ElisionFilter | |||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||
c | |||
l | |||
all | |||
dall | |||
dell | |||
nell | |||
sull | |||
coll | |||
pell | |||
gl | |||
agl | |||
dagl | |||
degl | |||
negl | |||
sugl | |||
un | |||
m | |||
t | |||
s | |||
v | |||
d |
@@ -0,0 +1,5 @@ | |||
# Set of Irish hyphenations for StopFilter | |||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||
h | |||
n | |||
t |
@@ -0,0 +1,6 @@ | |||
# Set of overrides for the dutch stemmer | |||
# TODO: load this as a resource from the analyzer and sync it in build.xml | |||
fiets fiets | |||
bromfiets bromfiets | |||
ei eier | |||
kind kinder |
@@ -0,0 +1,420 @@ | |||
# | |||
# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. | |||
# | |||
# Any token with a part-of-speech tag that exactly matches those defined in this | |||
# file are removed from the token stream. | |||
# | |||
# Set your own stoptags by uncommenting the lines below. Note that comments are | |||
# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, | |||
# etc. that can be useful for building you own stoptag set. | |||
# | |||
# The entire possible tagset is provided below for convenience. | |||
# | |||
##### | |||
# noun: unclassified nouns | |||
#名詞 | |||
# | |||
# noun-common: Common nouns or nouns where the sub-classification is undefined | |||
#名詞-一般 | |||
# | |||
# noun-proper: Proper nouns where the sub-classification is undefined | |||
#名詞-固有名詞 | |||
# | |||
# noun-proper-misc: miscellaneous proper nouns | |||
#名詞-固有名詞-一般 | |||
# | |||
# noun-proper-person: Personal names where the sub-classification is undefined | |||
#名詞-固有名詞-人名 | |||
# | |||
# noun-proper-person-misc: names that cannot be divided into surname and | |||
# given name; foreign names; names where the surname or given name is unknown. | |||
# e.g. お市の方 | |||
#名詞-固有名詞-人名-一般 | |||
# | |||
# noun-proper-person-surname: Mainly Japanese surnames. | |||
# e.g. 山田 | |||
#名詞-固有名詞-人名-姓 | |||
# | |||
# noun-proper-person-given_name: Mainly Japanese given names. | |||
# e.g. 太郎 | |||
#名詞-固有名詞-人名-名 | |||
# | |||
# noun-proper-organization: Names representing organizations. | |||
# e.g. 通産省, NHK | |||
#名詞-固有名詞-組織 | |||
# | |||
# noun-proper-place: Place names where the sub-classification is undefined | |||
#名詞-固有名詞-地域 | |||
# | |||
# noun-proper-place-misc: Place names excluding countries. | |||
# e.g. アジア, バルセロナ, 京都 | |||
#名詞-固有名詞-地域-一般 | |||
# | |||
# noun-proper-place-country: Country names. | |||
# e.g. 日本, オーストラリア | |||
#名詞-固有名詞-地域-国 | |||
# | |||
# noun-pronoun: Pronouns where the sub-classification is undefined | |||
#名詞-代名詞 | |||
# | |||
# noun-pronoun-misc: miscellaneous pronouns: | |||
# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ | |||
#名詞-代名詞-一般 | |||
# | |||
# noun-pronoun-contraction: Spoken language contraction made by combining a | |||
# pronoun and the particle 'wa'. | |||
# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ | |||
#名詞-代名詞-縮約 | |||
# | |||
# noun-adverbial: Temporal nouns such as names of days or months that behave | |||
# like adverbs. Nouns that represent amount or ratios and can be used adverbially, | |||
# e.g. 金曜, 一月, 午後, 少量 | |||
#名詞-副詞可能 | |||
# | |||
# noun-verbal: Nouns that take arguments with case and can appear followed by | |||
# 'suru' and related verbs (する, できる, なさる, くださる) | |||
# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り | |||
#名詞-サ変接続 | |||
# | |||
# noun-adjective-base: The base form of adjectives, words that appear before な ("na") | |||
# e.g. 健康, 安易, 駄目, だめ | |||
#名詞-形容動詞語幹 | |||
# | |||
# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. | |||
# e.g. 0, 1, 2, 何, 数, 幾 | |||
#名詞-数 | |||
# | |||
# noun-affix: noun affixes where the sub-classification is undefined | |||
#名詞-非自立 | |||
# | |||
# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that | |||
# attach to the base form of inflectional words, words that cannot be classified | |||
# into any of the other categories below. This category includes indefinite nouns. | |||
# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, | |||
# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, | |||
# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, | |||
# わり, 割り, 割, ん-口語/, もん-口語/ | |||
#名詞-非自立-一般 | |||
# | |||
# noun-affix-adverbial: noun affixes that that can behave as adverbs. | |||
# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, | |||
# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, | |||
# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, | |||
# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, | |||
# 儘, 侭, みぎり, 矢先 | |||
#名詞-非自立-副詞可能 | |||
# | |||
# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars | |||
# with the stem よう(だ) ("you(da)"). | |||
# e.g. よう, やう, 様 (よう) | |||
#名詞-非自立-助動詞語幹 | |||
# | |||
# noun-affix-adjective-base: noun affixes that can connect to the indeclinable | |||
# connection form な (aux "da"). | |||
# e.g. みたい, ふう | |||
#名詞-非自立-形容動詞語幹 | |||
# | |||
# noun-special: special nouns where the sub-classification is undefined. | |||
#名詞-特殊 | |||
# | |||
# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is | |||
# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base | |||
# form of inflectional words. | |||
# e.g. そう | |||
#名詞-特殊-助動詞語幹 | |||
# | |||
# noun-suffix: noun suffixes where the sub-classification is undefined. | |||
#名詞-接尾 | |||
# | |||
# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect | |||
# to ガル or タイ and can combine into compound nouns, words that cannot be classified into | |||
# any of the other categories below. In general, this category is more inclusive than | |||
# 接尾語 ("suffix") and is usually the last element in a compound noun. | |||
# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, | |||
# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 | |||
#名詞-接尾-一般 | |||
# | |||
# noun-suffix-person: Suffixes that form nouns and attach to person names more often | |||
# than other nouns. | |||
# e.g. 君, 様, 著 | |||
#名詞-接尾-人名 | |||
# | |||
# noun-suffix-place: Suffixes that form nouns and attach to place names more often | |||
# than other nouns. | |||
# e.g. 町, 市, 県 | |||
#名詞-接尾-地域 | |||
# | |||
# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that | |||
# can appear before スル ("suru"). | |||
# e.g. 化, 視, 分け, 入り, 落ち, 買い | |||
#名詞-接尾-サ変接続 | |||
# | |||
# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, | |||
# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the | |||
# conjunctive form of inflectional words. | |||
# e.g. そう | |||
#名詞-接尾-助動詞語幹 | |||
# | |||
# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive | |||
# form of inflectional words and appear before the copula だ ("da"). | |||
# e.g. 的, げ, がち | |||
#名詞-接尾-形容動詞語幹 | |||
# | |||
# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. | |||
# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) | |||
#名詞-接尾-副詞可能 | |||
# | |||
# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category | |||
# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach | |||
# to numbers. | |||
# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 | |||
#名詞-接尾-助数詞 | |||
# | |||
# noun-suffix-special: Special suffixes that mainly attach to inflecting words. | |||
# e.g. (楽し) さ, (考え) 方 | |||
#名詞-接尾-特殊 | |||
# | |||
# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words | |||
# together. | |||
# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) | |||
#名詞-接続詞的 | |||
# | |||
# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are | |||
# semantically verb-like. | |||
# e.g. ごらん, ご覧, 御覧, 頂戴 | |||
#名詞-動詞非自立的 | |||
# | |||
# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, | |||
# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") | |||
# is いわく ("iwaku"). | |||
#名詞-引用文字列 | |||
# | |||
# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and | |||
# behave like an adjective. | |||
# e.g. 申し訳, 仕方, とんでも, 違い | |||
#名詞-ナイ形容詞語幹 | |||
# | |||
##### | |||
# prefix: unclassified prefixes | |||
#接頭詞 | |||
# | |||
# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) | |||
# excluding numerical expressions. | |||
# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) | |||
#接頭詞-名詞接続 | |||
# | |||
# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb | |||
# in conjunctive form followed by なる/なさる/くださる. | |||
# e.g. お (読みなさい), お (座り) | |||
#接頭詞-動詞接続 | |||
# | |||
# prefix-adjectival: Prefixes that attach to adjectives. | |||
# e.g. お (寒いですねえ), バカ (でかい) | |||
#接頭詞-形容詞接続 | |||
# | |||
# prefix-numerical: Prefixes that attach to numerical expressions. | |||
# e.g. 約, およそ, 毎時 | |||
#接頭詞-数接続 | |||
# | |||
##### | |||
# verb: unclassified verbs | |||
#動詞 | |||
# | |||
# verb-main: | |||
#動詞-自立 | |||
# | |||
# verb-auxiliary: | |||
#動詞-非自立 | |||
# | |||
# verb-suffix: | |||
#動詞-接尾 | |||
# | |||
##### | |||
# adjective: unclassified adjectives | |||
#形容詞 | |||
# | |||
# adjective-main: | |||
#形容詞-自立 | |||
# | |||
# adjective-auxiliary: | |||
#形容詞-非自立 | |||
# | |||
# adjective-suffix: | |||
#形容詞-接尾 | |||
# | |||
##### | |||
# adverb: unclassified adverbs | |||
#副詞 | |||
# | |||
# adverb-misc: Words that can be segmented into one unit and where adnominal | |||
# modification is not possible. | |||
# e.g. あいかわらず, 多分 | |||
#副詞-一般 | |||
# | |||
# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, | |||
# な, する, だ, etc. | |||
# e.g. こんなに, そんなに, あんなに, なにか, なんでも | |||
#副詞-助詞類接続 | |||
# | |||
##### | |||
# adnominal: Words that only have noun-modifying forms. | |||
# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, | |||
# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, | |||
# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き | |||
#連体詞 | |||
# | |||
##### | |||
# conjunction: Conjunctions that can occur independently. | |||
# e.g. が, けれども, そして, じゃあ, それどころか | |||
接続詞 | |||
# | |||
##### | |||
# particle: unclassified particles. | |||
助詞 | |||
# | |||
# particle-case: case particles where the subclassification is undefined. | |||
助詞-格助詞 | |||
# | |||
# particle-case-misc: Case particles. | |||
# e.g. から, が, で, と, に, へ, より, を, の, にて | |||
助詞-格助詞-一般 | |||
# | |||
# particle-case-quote: the "to" that appears after nouns, a person’s speech, | |||
# quotation marks, expressions of decisions from a meeting, reasons, judgements, | |||
# conjectures, etc. | |||
# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) | |||
助詞-格助詞-引用 | |||
# | |||
# particle-case-compound: Compounds of particles and verbs that mainly behave | |||
# like case particles. | |||
# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, | |||
# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, | |||
# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, | |||
# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, | |||
# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, | |||
# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, | |||
# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, | |||
# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ | |||
助詞-格助詞-連語 | |||
# | |||
# particle-conjunctive: | |||
# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, | |||
# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, | |||
# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ | |||
助詞-接続助詞 | |||
# | |||
# particle-dependency: | |||
# e.g. こそ, さえ, しか, すら, は, も, ぞ | |||
助詞-係助詞 | |||
# | |||
# particle-adverbial: | |||
# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, | |||
# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, | |||
# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, | |||
# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, | |||
# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) | |||
助詞-副助詞 | |||
# | |||
# particle-interjective: particles with interjective grammatical roles. | |||
# e.g. (松島) や | |||
助詞-間投助詞 | |||
# | |||
# particle-coordinate: | |||
# e.g. と, たり, だの, だり, とか, なり, や, やら | |||
助詞-並立助詞 | |||
# | |||
# particle-final: | |||
# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, | |||
# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ | |||
助詞-終助詞 | |||
# | |||
# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is | |||
# adverbial, conjunctive, or sentence final. For example: | |||
# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 | |||
# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 | |||
# 「(祈りが届いたせい) か (, 試験に合格した.)」 | |||
# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 | |||
# e.g. か | |||
助詞-副助詞/並立助詞/終助詞 | |||
# | |||
# particle-adnominalizer: The "no" that attaches to nouns and modifies | |||
# non-inflectional words. | |||
助詞-連体化 | |||
# | |||
# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs | |||
# that are giongo, giseigo, or gitaigo. | |||
# e.g. に, と | |||
助詞-副詞化 | |||
# | |||
# particle-special: A particle that does not fit into one of the above classifications. | |||
# This includes particles that are used in Tanka, Haiku, and other poetry. | |||
# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) | |||
助詞-特殊 | |||
# | |||
##### | |||
# auxiliary-verb: | |||
助動詞 | |||
# | |||
##### | |||
# interjection: Greetings and other exclamations. | |||
# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, | |||
# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい | |||
#感動詞 | |||
# | |||
##### | |||
# symbol: unclassified Symbols. | |||
記号 | |||
# | |||
# symbol-misc: A general symbol not in one of the categories below. | |||
# e.g. [○◎@$〒→+] | |||
記号-一般 | |||
# | |||
# symbol-comma: Commas | |||
# e.g. [,、] | |||
記号-読点 | |||
# | |||
# symbol-period: Periods and full stops. | |||
# e.g. [..。] | |||
記号-句点 | |||
# | |||
# symbol-space: Full-width whitespace. | |||
記号-空白 | |||
# | |||
# symbol-open_bracket: | |||
# e.g. [({‘“『【] | |||
記号-括弧開 | |||
# | |||
# symbol-close_bracket: | |||
# e.g. [)}’”』」】] | |||
記号-括弧閉 | |||
# | |||
# symbol-alphabetic: | |||
#記号-アルファベット | |||
# | |||
##### | |||
# other: unclassified other | |||
#その他 | |||
# | |||
# other-interjection: Words that are hard to classify as noun-suffixes or | |||
# sentence-final particles. | |||
# e.g. (だ)ァ | |||
その他-間投 | |||
# | |||
##### | |||
# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. | |||
# e.g. あの, うんと, えと | |||
フィラー | |||
# | |||
##### | |||
# non-verbal: non-verbal sound. | |||
非言語音 | |||
# | |||
##### | |||
# fragment: | |||
#語断片 | |||
# | |||
##### | |||
# unknown: unknown part of speech. | |||
#未知語 | |||
# | |||
##### End of file |
@@ -0,0 +1,125 @@ | |||
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
# Also see http://www.opensource.org/licenses/bsd-license.html | |||
# Cleaned on October 11, 2009 (not normalized, so use before normalization) | |||
# This means that when modifying this list, you might need to add some | |||
# redundant entries, for example containing forms with both أ and ا | |||
من | |||
ومن | |||
منها | |||
منه | |||
في | |||
وفي | |||
فيها | |||
فيه | |||
و | |||
ف | |||
ثم | |||
او | |||
أو | |||
ب | |||
بها | |||
به | |||
ا | |||
أ | |||
اى | |||
اي | |||
أي | |||
أى | |||
لا | |||
ولا | |||
الا | |||
ألا | |||
إلا | |||
لكن | |||
ما | |||
وما | |||
كما | |||
فما | |||
عن | |||
مع | |||
اذا | |||
إذا | |||
ان | |||
أن | |||
إن | |||
انها | |||
أنها | |||
إنها | |||
انه | |||
أنه | |||
إنه | |||
بان | |||
بأن | |||
فان | |||
فأن | |||
وان | |||
وأن | |||
وإن | |||
التى | |||
التي | |||
الذى | |||
الذي | |||
الذين | |||
الى | |||
الي | |||
إلى | |||
إلي | |||
على | |||
عليها | |||
عليه | |||
اما | |||
أما | |||
إما | |||
ايضا | |||
أيضا | |||
كل | |||
وكل | |||
لم | |||
ولم | |||
لن | |||
ولن | |||
هى | |||
هي | |||
هو | |||
وهى | |||
وهي | |||
وهو | |||
فهى | |||
فهي | |||
فهو | |||
انت | |||
أنت | |||
لك | |||
لها | |||
له | |||
هذه | |||
هذا | |||
تلك | |||
ذلك | |||
هناك | |||
كانت | |||
كان | |||
يكون | |||
تكون | |||
وكانت | |||
وكان | |||
غير | |||
بعض | |||
قد | |||
نحو | |||
بين | |||
بينما | |||
منذ | |||
ضمن | |||
حيث | |||
الان | |||
الآن | |||
خلال | |||
بعد | |||
قبل | |||
حتى | |||
عند | |||
عندما | |||
لدى | |||
جميع |
@@ -0,0 +1,193 @@ | |||
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
# Also see http://www.opensource.org/licenses/bsd-license.html | |||
а | |||
аз | |||
ако | |||
ала | |||
бе | |||
без | |||
беше | |||
би | |||
бил | |||
била | |||
били | |||
било | |||
близо | |||
бъдат | |||
бъде | |||
бяха | |||
в | |||
вас | |||
ваш | |||
ваша | |||
вероятно | |||
вече | |||
взема | |||
ви | |||
вие | |||
винаги | |||
все | |||
всеки | |||
всички | |||
всичко | |||
всяка | |||
във | |||
въпреки | |||
върху | |||
г | |||
ги | |||
главно | |||
го | |||
д | |||
да | |||
дали | |||
до | |||
докато | |||
докога | |||
дори | |||
досега | |||
доста | |||
е | |||
едва | |||
един | |||
ето | |||
за | |||
зад | |||
заедно | |||
заради | |||
засега | |||
затова | |||
защо | |||
защото | |||
и | |||
из | |||
или | |||
им | |||
има | |||
имат | |||
иска | |||
й | |||
каза | |||
как | |||
каква | |||
какво | |||
както | |||
какъв | |||
като | |||
кога | |||
когато | |||
което | |||
които | |||
кой | |||
който | |||
колко | |||
която | |||
къде | |||
където | |||
към | |||
ли | |||
м | |||
ме | |||
между | |||
мен | |||
ми | |||
мнозина | |||
мога | |||
могат | |||
може | |||
моля | |||
момента | |||
му | |||
н | |||
на | |||
над | |||
назад | |||
най | |||
направи | |||
напред | |||
например | |||
нас | |||
не | |||
него | |||
нея | |||
ни | |||
ние | |||
никой | |||
нито | |||
но | |||
някои | |||
някой | |||
няма | |||
обаче | |||
около | |||
освен | |||
особено | |||
от | |||
отгоре | |||
отново | |||
още | |||
пак | |||
по | |||
повече | |||
повечето | |||
под | |||
поне | |||
поради | |||
после | |||
почти | |||
прави | |||
пред | |||
преди | |||
през | |||
при | |||
пък | |||
първо | |||
с | |||
са | |||
само | |||
се | |||
сега | |||
си | |||
скоро | |||
след | |||
сме | |||
според | |||
сред | |||
срещу | |||
сте | |||
съм | |||
със | |||
също | |||
т | |||
тази | |||
така | |||
такива | |||
такъв | |||
там | |||
твой | |||
те | |||
тези | |||
ти | |||
тн | |||
то | |||
това | |||
тогава | |||
този | |||
той | |||
толкова | |||
точно | |||
трябва | |||
тук | |||
тъй | |||
тя | |||
тях | |||
у | |||
харесва | |||
ч | |||
че | |||
често | |||
чрез | |||
ще | |||
щом | |||
я |
@@ -0,0 +1,220 @@ | |||
# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) | |||
a | |||
abans | |||
ací | |||
ah | |||
així | |||
això | |||
al | |||
als | |||
aleshores | |||
algun | |||
alguna | |||
algunes | |||
alguns | |||
alhora | |||
allà | |||
allí | |||
allò | |||
altra | |||
altre | |||
altres | |||
amb | |||
ambdós | |||
ambdues | |||
apa | |||
aquell | |||
aquella | |||
aquelles | |||
aquells | |||
aquest | |||
aquesta | |||
aquestes | |||
aquests | |||
aquí | |||
baix | |||
cada | |||
cadascú | |||
cadascuna | |||
cadascunes | |||
cadascuns | |||
com | |||
contra | |||
d'un | |||
d'una | |||
d'unes | |||
d'uns | |||
dalt | |||
de | |||
del | |||
dels | |||
des | |||
després | |||
dins | |||
dintre | |||
donat | |||
doncs | |||
durant | |||
e | |||
eh | |||
el | |||
els | |||
em | |||
en | |||
encara | |||
ens | |||
entre | |||
érem | |||
eren | |||
éreu | |||
es | |||
és | |||
esta | |||
està | |||
estàvem | |||
estaven | |||
estàveu | |||
esteu | |||
et | |||
etc | |||
ets | |||
fins | |||
fora | |||
gairebé | |||
ha | |||
han | |||
has | |||
havia | |||
he | |||
hem | |||
heu | |||
hi | |||
ho | |||
i | |||
igual | |||
iguals | |||
ja | |||
l'hi | |||
la | |||
les | |||
li | |||
li'n | |||
llavors | |||
m'he | |||
ma | |||
mal | |||
malgrat | |||
mateix | |||
mateixa | |||
mateixes | |||
mateixos | |||
me | |||
mentre | |||
més | |||
meu | |||
meus | |||
meva | |||
meves | |||
molt | |||
molta | |||
moltes | |||
molts | |||
mon | |||
mons | |||
n'he | |||
n'hi | |||
ne | |||
ni | |||
no | |||
nogensmenys | |||
només | |||
nosaltres | |||
nostra | |||
nostre | |||
nostres | |||
o | |||
oh | |||
oi | |||
on | |||
pas | |||
pel | |||
pels | |||
per | |||
però | |||
perquè | |||
poc | |||
poca | |||
pocs | |||
poques | |||
potser | |||
propi | |||
qual | |||
quals | |||
quan | |||
quant | |||
que | |||
què | |||
quelcom | |||
qui | |||
quin | |||
quina | |||
quines | |||
quins | |||
s'ha | |||
s'han | |||
sa | |||
semblant | |||
semblants | |||
ses | |||
seu | |||
seus | |||
seva | |||
seva | |||
seves | |||
si | |||
sobre | |||
sobretot | |||
sóc | |||
solament | |||
sols | |||
son | |||
són | |||
sons | |||
sota | |||
sou | |||
t'ha | |||
t'han | |||
t'he | |||
ta | |||
tal | |||
també | |||
tampoc | |||
tan | |||
tant | |||
tanta | |||
tantes | |||
teu | |||
teus | |||
teva | |||
teves | |||
ton | |||
tons | |||
tot | |||
tota | |||
totes | |||
tots | |||
un | |||
una | |||
unes | |||
uns | |||
us | |||
va | |||
vaig | |||
vam | |||
van | |||
vas | |||
veu | |||
vosaltres | |||
vostra | |||
vostre | |||
vostres |
@@ -0,0 +1,172 @@ | |||
a | |||
s | |||
k | |||
o | |||
i | |||
u | |||
v | |||
z | |||
dnes | |||
cz | |||
tímto | |||
budeš | |||
budem | |||
byli | |||
jseš | |||
můj | |||
svým | |||
ta | |||
tomto | |||
tohle | |||
tuto | |||
tyto | |||
jej | |||
zda | |||
proč | |||
máte | |||
tato | |||
kam | |||
tohoto | |||
kdo | |||
kteří | |||
mi | |||
nám | |||
tom | |||
tomuto | |||
mít | |||
nic | |||
proto | |||
kterou | |||
byla | |||
toho | |||
protože | |||
asi | |||
ho | |||
naši | |||
napište | |||
re | |||
což | |||
tím | |||
takže | |||
svých | |||
její | |||
svými | |||
jste | |||
aj | |||
tu | |||
tedy | |||
teto | |||
bylo | |||
kde | |||
ke | |||
pravé | |||
ji | |||
nad | |||
nejsou | |||
či | |||
pod | |||
téma | |||
mezi | |||
přes | |||
ty | |||
pak | |||
vám | |||
ani | |||
když | |||
však | |||
neg | |||
jsem | |||
tento | |||
článku | |||
články | |||
aby | |||
jsme | |||
před | |||
pta | |||
jejich | |||
byl | |||
ještě | |||
až | |||
bez | |||
také | |||
pouze | |||
první | |||
vaše | |||
která | |||
nás | |||
nový | |||
tipy | |||
pokud | |||
může | |||
strana | |||
jeho | |||
své | |||
jiné | |||
zprávy | |||
nové | |||
není | |||
vás | |||
jen | |||
podle | |||
zde | |||
už | |||
být | |||
více | |||
bude | |||
již | |||
než | |||
který | |||
by | |||
které | |||
co | |||
nebo | |||
ten | |||
tak | |||
má | |||
při | |||
od | |||
po | |||
jsou | |||
jak | |||
další | |||
ale | |||
si | |||
se | |||
ve | |||
to | |||
jako | |||
za | |||
zpět | |||
ze | |||
do | |||
pro | |||
je | |||
na | |||
atd | |||
atp | |||
jakmile | |||
přičemž | |||
já | |||
on | |||
ona | |||
ono | |||
oni | |||
ony | |||
my | |||
vy | |||
jí | |||
ji | |||
mě | |||
mne | |||
jemu | |||
tomu | |||
těm | |||
těmu | |||
němu | |||
němuž | |||
jehož | |||
jíž | |||
jelikož | |||
jež | |||
jakož | |||
načež |
@@ -0,0 +1,110 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| A Danish stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
| This is a ranked list (commonest to rarest) of stopwords derived from | |||
| a large text sample. | |||
og | and | |||
i | in | |||
jeg | I | |||
det | that (dem. pronoun)/it (pers. pronoun) | |||
at | that (in front of a sentence)/to (with infinitive) | |||
en | a/an | |||
den | it (pers. pronoun)/that (dem. pronoun) | |||
til | to/at/for/until/against/by/of/into, more | |||
er | present tense of "to be" | |||
som | who, as | |||
på | on/upon/in/on/at/to/after/of/with/for, on | |||
de | they | |||
med | with/by/in, along | |||
han | he | |||
af | of/by/from/off/for/in/with/on, off | |||
for | at/for/to/from/by/of/ago, in front/before, because | |||
ikke | not | |||
der | who/which, there/those | |||
var | past tense of "to be" | |||
mig | me/myself | |||
sig | oneself/himself/herself/itself/themselves | |||
men | but | |||
et | a/an/one, one (number), someone/somebody/one | |||
har | present tense of "to have" | |||
om | round/about/for/in/a, about/around/down, if | |||
vi | we | |||
min | my | |||
havde | past tense of "to have" | |||
ham | him | |||
hun | she | |||
nu | now | |||
over | over/above/across/by/beyond/past/on/about, over/past | |||
da | then, when/as/since | |||
fra | from/off/since, off, since | |||
du | you | |||
ud | out | |||
sin | his/her/its/one's | |||
dem | them | |||
os | us/ourselves | |||
op | up | |||
man | you/one | |||
hans | his | |||
hvor | where | |||
eller | or | |||
hvad | what | |||
skal | must/shall etc. | |||
selv | myself/youself/herself/ourselves etc., even | |||
her | here | |||
alle | all/everyone/everybody etc. | |||
vil | will (verb) | |||
blev | past tense of "to stay/to remain/to get/to become" | |||
kunne | could | |||
ind | in | |||
når | when | |||
være | present tense of "to be" | |||
dog | however/yet/after all | |||
noget | something | |||
ville | would | |||
jo | you know/you see (adv), yes | |||
deres | their/theirs | |||
efter | after/behind/according to/for/by/from, later/afterwards | |||
ned | down | |||
skulle | should | |||
denne | this | |||
end | than | |||
dette | this | |||
mit | my/mine | |||
også | also | |||
under | under/beneath/below/during, below/underneath | |||
have | have | |||
dig | you | |||
anden | other | |||
hende | her | |||
mine | my | |||
alt | everything | |||
meget | much/very, plenty of | |||
sit | his, her, its, one's | |||
sine | his, her, its, one's | |||
vor | our | |||
mod | against | |||
disse | these | |||
hvis | if | |||
din | your/yours | |||
nogle | some | |||
hos | by/at | |||
blive | be/become | |||
mange | many | |||
ad | by/through | |||
bliver | present tense of "to be/to become" | |||
hendes | her/hers | |||
været | be | |||
thi | for (conj) | |||
jer | you | |||
sådan | such, like this/like that |
@@ -0,0 +1,294 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| A German stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
| The number of forms in this list is reduced significantly by passing it | |||
| through the German stemmer. | |||
aber | but | |||
alle | all | |||
allem | |||
allen | |||
aller | |||
alles | |||
als | than, as | |||
also | so | |||
am | an + dem | |||
an | at | |||
ander | other | |||
andere | |||
anderem | |||
anderen | |||
anderer | |||
anderes | |||
anderm | |||
andern | |||
anderr | |||
anders | |||
auch | also | |||
auf | on | |||
aus | out of | |||
bei | by | |||
bin | am | |||
bis | until | |||
bist | art | |||
da | there | |||
damit | with it | |||
dann | then | |||
der | the | |||
den | |||
des | |||
dem | |||
die | |||
das | |||
daß | that | |||
derselbe | the same | |||
derselben | |||
denselben | |||
desselben | |||
demselben | |||
dieselbe | |||
dieselben | |||
dasselbe | |||
dazu | to that | |||
dein | thy | |||
deine | |||
deinem | |||
deinen | |||
deiner | |||
deines | |||
denn | because | |||
derer | of those | |||
dessen | of him | |||
dich | thee | |||
dir | to thee | |||
du | thou | |||
dies | this | |||
diese | |||
diesem | |||
diesen | |||
dieser | |||
dieses | |||
doch | (several meanings) | |||
dort | (over) there | |||
durch | through | |||
ein | a | |||
eine | |||
einem | |||
einen | |||
einer | |||
eines | |||
einig | some | |||
einige | |||
einigem | |||
einigen | |||
einiger | |||
einiges | |||
einmal | once | |||
er | he | |||
ihn | him | |||
ihm | to him | |||
es | it | |||
etwas | something | |||
euer | your | |||
eure | |||
eurem | |||
euren | |||
eurer | |||
eures | |||
für | for | |||
gegen | towards | |||
gewesen | p.p. of sein | |||
hab | have | |||
habe | have | |||
haben | have | |||
hat | has | |||
hatte | had | |||
hatten | had | |||
hier | here | |||
hin | there | |||
hinter | behind | |||
ich | I | |||
mich | me | |||
mir | to me | |||
ihr | you, to her | |||
ihre | |||
ihrem | |||
ihren | |||
ihrer | |||
ihres | |||
euch | to you | |||
im | in + dem | |||
in | in | |||
indem | while | |||
ins | in + das | |||
ist | is | |||
jede | each, every | |||
jedem | |||
jeden | |||
jeder | |||
jedes | |||
jene | that | |||
jenem | |||
jenen | |||
jener | |||
jenes | |||
jetzt | now | |||
kann | can | |||
kein | no | |||
keine | |||
keinem | |||
keinen | |||
keiner | |||
keines | |||
können | can | |||
könnte | could | |||
machen | do | |||
man | one | |||
manche | some, many a | |||
manchem | |||
manchen | |||
mancher | |||
manches | |||
mein | my | |||
meine | |||
meinem | |||
meinen | |||
meiner | |||
meines | |||
mit | with | |||
muss | must | |||
musste | had to | |||
nach | to(wards) | |||
nicht | not | |||
nichts | nothing | |||
noch | still, yet | |||
nun | now | |||
nur | only | |||
ob | whether | |||
oder | or | |||
ohne | without | |||
sehr | very | |||
sein | his | |||
seine | |||
seinem | |||
seinen | |||
seiner | |||
seines | |||
selbst | self | |||
sich | herself | |||
sie | they, she | |||
ihnen | to them | |||
sind | are | |||
so | so | |||
solche | such | |||
solchem | |||
solchen | |||
solcher | |||
solches | |||
soll | shall | |||
sollte | should | |||
sondern | but | |||
sonst | else | |||
über | over | |||
um | about, around | |||
und | and | |||
uns | us | |||
unse | |||
unsem | |||
unsen | |||
unser | |||
unses | |||
unter | under | |||
viel | much | |||
vom | von + dem | |||
von | from | |||
vor | before | |||
während | while | |||
war | was | |||
waren | were | |||
warst | wast | |||
was | what | |||
weg | away, off | |||
weil | because | |||
weiter | further | |||
welche | which | |||
welchem | |||
welchen | |||
welcher | |||
welches | |||
wenn | when | |||
werde | will | |||
werden | will | |||
wie | how | |||
wieder | again | |||
will | want | |||
wir | we | |||
wird | will | |||
wirst | willst | |||
wo | where | |||
wollen | want | |||
wollte | wanted | |||
würde | would | |||
würden | would | |||
zu | to | |||
zum | zu + dem | |||
zur | zu + der | |||
zwar | indeed | |||
zwischen | between | |||
@@ -0,0 +1,78 @@ | |||
# Lucene Greek Stopwords list | |||
# Note: by default this file is used after GreekLowerCaseFilter, | |||
# so when modifying this file use 'σ' instead of 'ς' | |||
ο | |||
η | |||
το | |||
οι | |||
τα | |||
του | |||
τησ | |||
των | |||
τον | |||
την | |||
και | |||
κι | |||
κ | |||
ειμαι | |||
εισαι | |||
ειναι | |||
ειμαστε | |||
ειστε | |||
στο | |||
στον | |||
στη | |||
στην | |||
μα | |||
αλλα | |||
απο | |||
για | |||
προσ | |||
με | |||
σε | |||
ωσ | |||
παρα | |||
αντι | |||
κατα | |||
μετα | |||
θα | |||
να | |||
δε | |||
δεν | |||
μη | |||
μην | |||
επι | |||
ενω | |||
εαν | |||
αν | |||
τοτε | |||
που | |||
πωσ | |||
ποιοσ | |||
ποια | |||
ποιο | |||
ποιοι | |||
ποιεσ | |||
ποιων | |||
ποιουσ | |||
αυτοσ | |||
αυτη | |||
αυτο | |||
αυτοι | |||
αυτων | |||
αυτουσ | |||
αυτεσ | |||
αυτα | |||
εκεινοσ | |||
εκεινη | |||
εκεινο | |||
εκεινοι | |||
εκεινεσ | |||
εκεινα | |||
εκεινων | |||
εκεινουσ | |||
οπωσ | |||
ομωσ | |||
ισωσ | |||
οσο | |||
οτι |
@@ -0,0 +1,54 @@ | |||
# Licensed to the Apache Software Foundation (ASF) under one or more | |||
# contributor license agreements. See the NOTICE file distributed with | |||
# this work for additional information regarding copyright ownership. | |||
# The ASF licenses this file to You under the Apache License, Version 2.0 | |||
# (the "License"); you may not use this file except in compliance with | |||
# the License. You may obtain a copy of the License at | |||
# | |||
# http://www.apache.org/licenses/LICENSE-2.0 | |||
# | |||
# Unless required by applicable law or agreed to in writing, software | |||
# distributed under the License is distributed on an "AS IS" BASIS, | |||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
# See the License for the specific language governing permissions and | |||
# limitations under the License. | |||
# a couple of test stopwords to test that the words are really being | |||
# configured from this file: | |||
stopworda | |||
stopwordb | |||
# Standard english stop words taken from Lucene's StopAnalyzer | |||
a | |||
an | |||
and | |||
are | |||
as | |||
at | |||
be | |||
but | |||
by | |||
for | |||
if | |||
in | |||
into | |||
is | |||
it | |||
no | |||
not | |||
of | |||
on | |||
or | |||
such | |||
that | |||
the | |||
their | |||
then | |||
there | |||
these | |||
they | |||
this | |||
to | |||
was | |||
will | |||
with |
@@ -0,0 +1,356 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| A Spanish stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
| The following is a ranked list (commonest to rarest) of stopwords | |||
| deriving from a large sample of text. | |||
| Extra words have been added at the end. | |||
de | from, of | |||
la | the, her | |||
que | who, that | |||
el | the | |||
en | in | |||
y | and | |||
a | to | |||
los | the, them | |||
del | de + el | |||
se | himself, from him etc | |||
las | the, them | |||
por | for, by, etc | |||
un | a | |||
para | for | |||
con | with | |||
no | no | |||
una | a | |||
su | his, her | |||
al | a + el | |||
| es from SER | |||
lo | him | |||
como | how | |||
más | more | |||
pero | pero | |||
sus | su plural | |||
le | to him, her | |||
ya | already | |||
o | or | |||
| fue from SER | |||
este | this | |||
| ha from HABER | |||
sí | himself etc | |||
porque | because | |||
esta | this | |||
| son from SER | |||
entre | between | |||
| está from ESTAR | |||
cuando | when | |||
muy | very | |||
sin | without | |||
sobre | on | |||
| ser from SER | |||
| tiene from TENER | |||
también | also | |||
me | me | |||
hasta | until | |||
hay | there is/are | |||
donde | where | |||
| han from HABER | |||
quien | whom, that | |||
| están from ESTAR | |||
| estado from ESTAR | |||
desde | from | |||
todo | all | |||
nos | us | |||
durante | during | |||
| estados from ESTAR | |||
todos | all | |||
uno | a | |||
les | to them | |||
ni | nor | |||
contra | against | |||
otros | other | |||
| fueron from SER | |||
ese | that | |||
eso | that | |||
| había from HABER | |||
ante | before | |||
ellos | they | |||
e | and (variant of y) | |||
esto | this | |||
mí | me | |||
antes | before | |||
algunos | some | |||
qué | what? | |||
unos | a | |||
yo | I | |||
otro | other | |||
otras | other | |||
otra | other | |||
él | he | |||
tanto | so much, many | |||
esa | that | |||
estos | these | |||
mucho | much, many | |||
quienes | who | |||
nada | nothing | |||
muchos | many | |||
cual | who | |||
| sea from SER | |||
poco | few | |||
ella | she | |||
estar | to be | |||
| haber from HABER | |||
estas | these | |||
| estaba from ESTAR | |||
| estamos from ESTAR | |||
algunas | some | |||
algo | something | |||
nosotros | we | |||
| other forms | |||
mi | me | |||
mis | mi plural | |||
tú | thou | |||
te | thee | |||
ti | thee | |||
tu | thy | |||
tus | tu plural | |||
ellas | they | |||
nosotras | we | |||
vosotros | you | |||
vosotras | you | |||
os | you | |||
mío | mine | |||
mía | | |||
míos | | |||
mías | | |||
tuyo | thine | |||
tuya | | |||
tuyos | | |||
tuyas | | |||
suyo | his, hers, theirs | |||
suya | | |||
suyos | | |||
suyas | | |||
nuestro | ours | |||
nuestra | | |||
nuestros | | |||
nuestras | | |||
vuestro | yours | |||
vuestra | | |||
vuestros | | |||
vuestras | | |||
esos | those | |||
esas | those | |||
| forms of estar, to be (not including the infinitive): | |||
estoy | |||
estás | |||
está | |||
estamos | |||
estáis | |||
están | |||
esté | |||
estés | |||
estemos | |||
estéis | |||
estén | |||
estaré | |||
estarás | |||
estará | |||
estaremos | |||
estaréis | |||
estarán | |||
estaría | |||
estarías | |||
estaríamos | |||
estaríais | |||
estarían | |||
estaba | |||
estabas | |||
estábamos | |||
estabais | |||
estaban | |||
estuve | |||
estuviste | |||
estuvo | |||
estuvimos | |||
estuvisteis | |||
estuvieron | |||
estuviera | |||
estuvieras | |||
estuviéramos | |||
estuvierais | |||
estuvieran | |||
estuviese | |||
estuvieses | |||
estuviésemos | |||
estuvieseis | |||
estuviesen | |||
estando | |||
estado | |||
estada | |||
estados | |||
estadas | |||
estad | |||
| forms of haber, to have (not including the infinitive): | |||
he | |||
has | |||
ha | |||
hemos | |||
habéis | |||
han | |||
haya | |||
hayas | |||
hayamos | |||
hayáis | |||
hayan | |||
habré | |||
habrás | |||
habrá | |||
habremos | |||
habréis | |||
habrán | |||
habría | |||
habrías | |||
habríamos | |||
habríais | |||
habrían | |||
había | |||
habías | |||
habíamos | |||
habíais | |||
habían | |||
hube | |||
hubiste | |||
hubo | |||
hubimos | |||
hubisteis | |||
hubieron | |||
hubiera | |||
hubieras | |||
hubiéramos | |||
hubierais | |||
hubieran | |||
hubiese | |||
hubieses | |||
hubiésemos | |||
hubieseis | |||
hubiesen | |||
habiendo | |||
habido | |||
habida | |||
habidos | |||
habidas | |||
| forms of ser, to be (not including the infinitive): | |||
soy | |||
eres | |||
es | |||
somos | |||
sois | |||
son | |||
sea | |||
seas | |||
seamos | |||
seáis | |||
sean | |||
seré | |||
serás | |||
será | |||
seremos | |||
seréis | |||
serán | |||
sería | |||
serías | |||
seríamos | |||
seríais | |||
serían | |||
era | |||
eras | |||
éramos | |||
erais | |||
eran | |||
fui | |||
fuiste | |||
fue | |||
fuimos | |||
fuisteis | |||
fueron | |||
fuera | |||
fueras | |||
fuéramos | |||
fuerais | |||
fueran | |||
fuese | |||
fueses | |||
fuésemos | |||
fueseis | |||
fuesen | |||
siendo | |||
sido | |||
| sed also means 'thirst' | |||
| forms of tener, to have (not including the infinitive): | |||
tengo | |||
tienes | |||
tiene | |||
tenemos | |||
tenéis | |||
tienen | |||
tenga | |||
tengas | |||
tengamos | |||
tengáis | |||
tengan | |||
tendré | |||
tendrás | |||
tendrá | |||
tendremos | |||
tendréis | |||
tendrán | |||
tendría | |||
tendrías | |||
tendríamos | |||
tendríais | |||
tendrían | |||
tenía | |||
tenías | |||
teníamos | |||
teníais | |||
tenían | |||
tuve | |||
tuviste | |||
tuvo | |||
tuvimos | |||
tuvisteis | |||
tuvieron | |||
tuviera | |||
tuvieras | |||
tuviéramos | |||
tuvierais | |||
tuvieran | |||
tuviese | |||
tuvieses | |||
tuviésemos | |||
tuvieseis | |||
tuviesen | |||
teniendo | |||
tenido | |||
tenida | |||
tenidos | |||
tenidas | |||
tened | |||
@@ -0,0 +1,99 @@ | |||
# example set of basque stopwords | |||
al | |||
anitz | |||
arabera | |||
asko | |||
baina | |||
bat | |||
batean | |||
batek | |||
bati | |||
batzuei | |||
batzuek | |||
batzuetan | |||
batzuk | |||
bera | |||
beraiek | |||
berau | |||
berauek | |||
bere | |||
berori | |||
beroriek | |||
beste | |||
bezala | |||
da | |||
dago | |||
dira | |||
ditu | |||
du | |||
dute | |||
edo | |||
egin | |||
ere | |||
eta | |||
eurak | |||
ez | |||
gainera | |||
gu | |||
gutxi | |||
guzti | |||
haiei | |||
haiek | |||
haietan | |||
hainbeste | |||
hala | |||
han | |||
handik | |||
hango | |||
hara | |||
hari | |||
hark | |||
hartan | |||
hau | |||
hauei | |||
hauek | |||
hauetan | |||
hemen | |||
hemendik | |||
hemengo | |||
hi | |||
hona | |||
honek | |||
honela | |||
honetan | |||
honi | |||
hor | |||
hori | |||
horiei | |||
horiek | |||
horietan | |||
horko | |||
horra | |||
horrek | |||
horrela | |||
horretan | |||
horri | |||
hortik | |||
hura | |||
izan | |||
ni | |||
noiz | |||
nola | |||
non | |||
nondik | |||
nongo | |||
nor | |||
nora | |||
ze | |||
zein | |||
zen | |||
zenbait | |||
zenbat | |||
zer | |||
zergatik | |||
ziren | |||
zituen | |||
zu | |||
zuek | |||
zuen | |||
zuten |
@@ -0,0 +1,313 @@ | |||
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
# Also see http://www.opensource.org/licenses/bsd-license.html | |||
# Note: by default this file is used after normalization, so when adding entries | |||
# to this file, use the arabic 'ي' instead of 'ی' | |||
انان | |||
نداشته | |||
سراسر | |||
خياه | |||
ايشان | |||
وي | |||
تاكنون | |||
بيشتري | |||
دوم | |||
پس | |||
ناشي | |||
وگو | |||
يا | |||
داشتند | |||
سپس | |||
هنگام | |||
هرگز | |||
پنج | |||
نشان | |||
امسال | |||
ديگر | |||
گروهي | |||
شدند | |||
چطور | |||
ده | |||
و | |||
دو | |||
نخستين | |||
ولي | |||
چرا | |||
چه | |||
وسط | |||
ه | |||
كدام | |||
قابل | |||
يك | |||
رفت | |||
هفت | |||
همچنين | |||
در | |||
هزار | |||
بله | |||
بلي | |||
شايد | |||
اما | |||
شناسي | |||
گرفته | |||
دهد | |||
داشته | |||
دانست | |||
داشتن | |||
خواهيم | |||
ميليارد | |||
وقتيكه | |||
امد | |||
خواهد | |||
جز | |||
اورده | |||
شده | |||
بلكه | |||
خدمات | |||
شدن | |||
برخي | |||
نبود | |||
بسياري | |||
جلوگيري | |||
حق | |||
كردند | |||
نوعي | |||
بعري | |||
نكرده | |||
نظير | |||
نبايد | |||
بوده | |||
بودن | |||
داد | |||
اورد | |||
هست | |||
جايي | |||
شود | |||
دنبال | |||
داده | |||
بايد | |||
سابق | |||
هيچ | |||
همان | |||
انجا | |||
كمتر | |||
كجاست | |||
گردد | |||
كسي | |||
تر | |||
مردم | |||
تان | |||
دادن | |||
بودند | |||
سري | |||
جدا | |||
ندارند | |||
مگر | |||
يكديگر | |||
دارد | |||
دهند | |||
بنابراين | |||
هنگامي | |||
سمت | |||
جا | |||
انچه | |||
خود | |||
دادند | |||
زياد | |||
دارند | |||
اثر | |||
بدون | |||
بهترين | |||
بيشتر | |||
البته | |||
به | |||
براساس | |||
بيرون | |||
كرد | |||
بعضي | |||
گرفت | |||
توي | |||
اي | |||
ميليون | |||
او | |||
جريان | |||
تول | |||
بر | |||
مانند | |||
برابر | |||
باشيم | |||
مدتي | |||
گويند | |||
اكنون | |||
تا | |||
تنها | |||
جديد | |||
چند | |||
بي | |||
نشده | |||
كردن | |||
كردم | |||
گويد | |||
كرده | |||
كنيم | |||
نمي | |||
نزد | |||
روي | |||
قصد | |||
فقط | |||
بالاي | |||
ديگران | |||
اين | |||
ديروز | |||
توسط | |||
سوم | |||
ايم | |||
دانند | |||
سوي | |||
استفاده | |||
شما | |||
كنار | |||
داريم | |||
ساخته | |||
طور | |||
امده | |||
رفته | |||
نخست | |||
بيست | |||
نزديك | |||
طي | |||
كنيد | |||
از | |||
انها | |||
تمامي | |||
داشت | |||
يكي | |||
طريق | |||
اش | |||
چيست | |||
روب | |||
نمايد | |||
گفت | |||
چندين | |||
چيزي | |||
تواند | |||
ام | |||
ايا | |||
با | |||
ان | |||
ايد | |||
ترين | |||
اينكه | |||
ديگري | |||
راه | |||
هايي | |||
بروز | |||
همچنان | |||
پاعين | |||
كس | |||
حدود | |||
مختلف | |||
مقابل | |||
چيز | |||
گيرد | |||
ندارد | |||
ضد | |||
همچون | |||
سازي | |||
شان | |||
مورد | |||
باره | |||
مرسي | |||
خويش | |||
برخوردار | |||
چون | |||
خارج | |||
شش | |||
هنوز | |||
تحت | |||
ضمن | |||
هستيم | |||
گفته | |||
فكر | |||
بسيار | |||
پيش | |||
براي | |||
روزهاي | |||
انكه | |||
نخواهد | |||
بالا | |||
كل | |||
وقتي | |||
كي | |||
چنين | |||
كه | |||
گيري | |||
نيست | |||
است | |||
كجا | |||
كند | |||
نيز | |||
يابد | |||
بندي | |||
حتي | |||
توانند | |||
عقب | |||
خواست | |||
كنند | |||
بين | |||
تمام | |||
همه | |||
ما | |||
باشند | |||
مثل | |||
شد | |||
اري | |||
باشد | |||
اره | |||
طبق | |||
بعد | |||
اگر | |||
صورت | |||
غير | |||
جاي | |||
بيش | |||
ريزي | |||
اند | |||
زيرا | |||
چگونه | |||
بار | |||
لطفا | |||
مي | |||
درباره | |||
من | |||
ديده | |||
همين | |||
گذاري | |||
برداري | |||
علت | |||
گذاشته | |||
هم | |||
فوق | |||
نه | |||
ها | |||
شوند | |||
اباد | |||
همواره | |||
هر | |||
اول | |||
خواهند | |||
چهار | |||
نام | |||
امروز | |||
مان | |||
هاي | |||
قبل | |||
كنم | |||
سعي | |||
تازه | |||
را | |||
هستند | |||
زير | |||
جلوي | |||
عنوان | |||
بود |
@@ -0,0 +1,97 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| forms of BE | |||
olla | |||
olen | |||
olet | |||
on | |||
olemme | |||
olette | |||
ovat | |||
ole | negative form | |||
oli | |||
olisi | |||
olisit | |||
olisin | |||
olisimme | |||
olisitte | |||
olisivat | |||
olit | |||
olin | |||
olimme | |||
olitte | |||
olivat | |||
ollut | |||
olleet | |||
en | negation | |||
et | |||
ei | |||
emme | |||
ette | |||
eivät | |||
|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans | |||
minä minun minut minua minussa minusta minuun minulla minulta minulle | I | |||
sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you | |||
hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she | |||
me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we | |||
te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you | |||
he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they | |||
tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this | |||
tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that | |||
se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it | |||
nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these | |||
nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those | |||
ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they | |||
kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who | |||
ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) | |||
mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what | |||
mitkä | (pl) | |||
joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which | |||
jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) | |||
| conjunctions | |||
että | that | |||
ja | and | |||
jos | if | |||
koska | because | |||
kuin | than | |||
mutta | but | |||
niin | so | |||
sekä | and | |||
sillä | for | |||
tai | or | |||
vaan | but | |||
vai | or | |||
vaikka | although | |||
| prepositions | |||
kanssa | with | |||
mukaan | according to | |||
noin | about | |||
poikki | across | |||
yli | over, across | |||
| other | |||
kun | when | |||
niin | so | |||
nyt | now | |||
itse | self | |||
@@ -0,0 +1,186 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| A French stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
au | a + le | |||
aux | a + les | |||
avec | with | |||
ce | this | |||
ces | these | |||
dans | with | |||
de | of | |||
des | de + les | |||
du | de + le | |||
elle | she | |||
en | `of them' etc | |||
et | and | |||
eux | them | |||
il | he | |||
je | I | |||
la | the | |||
le | the | |||
leur | their | |||
lui | him | |||
ma | my (fem) | |||
mais | but | |||
me | me | |||
même | same; as in moi-même (myself) etc | |||
mes | me (pl) | |||
moi | me | |||
mon | my (masc) | |||
ne | not | |||
nos | our (pl) | |||
notre | our | |||
nous | we | |||
on | one | |||
ou | where | |||
par | by | |||
pas | not | |||
pour | for | |||
qu | que before vowel | |||
que | that | |||
qui | who | |||
sa | his, her (fem) | |||
se | oneself | |||
ses | his (pl) | |||
son | his, her (masc) | |||
sur | on | |||
ta | thy (fem) | |||
te | thee | |||
tes | thy (pl) | |||
toi | thee | |||
ton | thy (masc) | |||
tu | thou | |||
un | a | |||
une | a | |||
vos | your (pl) | |||
votre | your | |||
vous | you | |||
| single letter forms | |||
c | c' | |||
d | d' | |||
j | j' | |||
l | l' | |||
à | to, at | |||
m | m' | |||
n | n' | |||
s | s' | |||
t | t' | |||
y | there | |||
| forms of être (not including the infinitive): | |||
été | |||
étée | |||
étées | |||
étés | |||
étant | |||
suis | |||
es | |||
est | |||
sommes | |||
êtes | |||
sont | |||
serai | |||
seras | |||
sera | |||
serons | |||
serez | |||
seront | |||
serais | |||
serait | |||
serions | |||
seriez | |||
seraient | |||
étais | |||
était | |||
étions | |||
étiez | |||
étaient | |||
fus | |||
fut | |||
fûmes | |||
fûtes | |||
furent | |||
sois | |||
soit | |||
soyons | |||
soyez | |||
soient | |||
fusse | |||
fusses | |||
fût | |||
fussions | |||
fussiez | |||
fussent | |||
| forms of avoir (not including the infinitive): | |||
ayant | |||
eu | |||
eue | |||
eues | |||
eus | |||
ai | |||
as | |||
avons | |||
avez | |||
ont | |||
aurai | |||
auras | |||
aura | |||
aurons | |||
aurez | |||
auront | |||
aurais | |||
aurait | |||
aurions | |||
auriez | |||
auraient | |||
avais | |||
avait | |||
avions | |||
aviez | |||
avaient | |||
eut | |||
eûmes | |||
eûtes | |||
eurent | |||
aie | |||
aies | |||
ait | |||
ayons | |||
ayez | |||
aient | |||
eusse | |||
eusses | |||
eût | |||
eussions | |||
eussiez | |||
eussent | |||
| Later additions (from Jean-Christophe Deschamps) | |||
ceci | this | |||
cela | that | |||
celà | that | |||
cet | this | |||
cette | this | |||
ici | here | |||
ils | they | |||
les | the (pl) | |||
leurs | their (pl) | |||
quel | which | |||
quels | which | |||
quelle | which | |||
quelles | which | |||
sans | without | |||
soi | oneself | |||
@@ -0,0 +1,110 @@ | |||
a | |||
ach | |||
ag | |||
agus | |||
an | |||
aon | |||
ar | |||
arna | |||
as | |||
b' | |||
ba | |||
beirt | |||
bhúr | |||
caoga | |||
ceathair | |||
ceathrar | |||
chomh | |||
chtó | |||
chuig | |||
chun | |||
cois | |||
céad | |||
cúig | |||
cúigear | |||
d' | |||
daichead | |||
dar | |||
de | |||
deich | |||
deichniúr | |||
den | |||
dhá | |||
do | |||
don | |||
dtí | |||
dá | |||
dár | |||
dó | |||
faoi | |||
faoin | |||
faoina | |||
faoinár | |||
fara | |||
fiche | |||
gach | |||
gan | |||
go | |||
gur | |||
haon | |||
hocht | |||
i | |||
iad | |||
idir | |||
in | |||
ina | |||
ins | |||
inár | |||
is | |||
le | |||
leis | |||
lena | |||
lenár | |||
m' | |||
mar | |||
mo | |||
mé | |||
na | |||
nach | |||
naoi | |||
naonúr | |||
ná | |||
ní | |||
níor | |||
nó | |||
nócha | |||
ocht | |||
ochtar | |||
os | |||
roimh | |||
sa | |||
seacht | |||
seachtar | |||
seachtó | |||
seasca | |||
seisear | |||
siad | |||
sibh | |||
sinn | |||
sna | |||
sé | |||
sí | |||
tar | |||
thar | |||
thú | |||
triúr | |||
trí | |||
trína | |||
trínár | |||
tríocha | |||
tú | |||
um | |||
ár | |||
é | |||
éis | |||
í | |||
ó | |||
ón | |||
óna | |||
ónár |
@@ -0,0 +1,161 @@ | |||
# galican stopwords | |||
a | |||
aínda | |||
alí | |||
aquel | |||
aquela | |||
aquelas | |||
aqueles | |||
aquilo | |||
aquí | |||
ao | |||
aos | |||
as | |||
así | |||
á | |||
ben | |||
cando | |||
che | |||
co | |||
coa | |||
comigo | |||
con | |||
connosco | |||
contigo | |||
convosco | |||
coas | |||
cos | |||
cun | |||
cuns | |||
cunha | |||
cunhas | |||
da | |||
dalgunha | |||
dalgunhas | |||
dalgún | |||
dalgúns | |||
das | |||
de | |||
del | |||
dela | |||
delas | |||
deles | |||
desde | |||
deste | |||
do | |||
dos | |||
dun | |||
duns | |||
dunha | |||
dunhas | |||
e | |||
el | |||
ela | |||
elas | |||
eles | |||
en | |||
era | |||
eran | |||
esa | |||
esas | |||
ese | |||
eses | |||
esta | |||
estar | |||
estaba | |||
está | |||
están | |||
este | |||
estes | |||
estiven | |||
estou | |||
eu | |||
é | |||
facer | |||
foi | |||
foron | |||
fun | |||
había | |||
hai | |||
iso | |||
isto | |||
la | |||
las | |||
lle | |||
lles | |||
lo | |||
los | |||
mais | |||
me | |||
meu | |||
meus | |||
min | |||
miña | |||
miñas | |||
moi | |||
na | |||
nas | |||
neste | |||
nin | |||
no | |||
non | |||
nos | |||
nosa | |||
nosas | |||
noso | |||
nosos | |||
nós | |||
nun | |||
nunha | |||
nuns | |||
nunhas | |||
o | |||
os | |||
ou | |||
ó | |||
ós | |||
para | |||
pero | |||
pode | |||
pois | |||
pola | |||
polas | |||
polo | |||
polos | |||
por | |||
que | |||
se | |||
senón | |||
ser | |||
seu | |||
seus | |||
sexa | |||
sido | |||
sobre | |||
súa | |||
súas | |||
tamén | |||
tan | |||
te | |||
ten | |||
teñen | |||
teño | |||
ter | |||
teu | |||
teus | |||
ti | |||
tido | |||
tiña | |||
tiven | |||
túa | |||
túas | |||
un | |||
unha | |||
unhas | |||
uns | |||
vos | |||
vosa | |||
vosas | |||
voso | |||
vosos | |||
vós |
@@ -0,0 +1,235 @@ | |||
# Also see http://www.opensource.org/licenses/bsd-license.html | |||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||
# Note: by default this file also contains forms normalized by HindiNormalizer | |||
# for spelling variation (see section below), such that it can be used whether or | |||
# not you enable that feature. When adding additional entries to this list, | |||
# please add the normalized form as well. | |||
अंदर | |||
अत | |||
अपना | |||
अपनी | |||
अपने | |||
अभी | |||
आदि | |||
आप | |||
इत्यादि | |||
इन | |||
इनका | |||
इन्हीं | |||
इन्हें | |||
इन्हों | |||
इस | |||
इसका | |||
इसकी | |||
इसके | |||
इसमें | |||
इसी | |||
इसे | |||
उन | |||
उनका | |||
उनकी | |||
उनके | |||
उनको | |||
उन्हीं | |||
उन्हें | |||
उन्हों | |||
उस | |||
उसके | |||
उसी | |||
उसे | |||
एक | |||
एवं | |||
एस | |||
ऐसे | |||
और | |||
कई | |||
कर | |||
करता | |||
करते | |||
करना | |||
करने | |||
करें | |||
कहते | |||
कहा | |||
का | |||
काफ़ी | |||
कि | |||
कितना | |||
किन्हें | |||
किन्हों | |||
किया | |||
किर | |||
किस | |||
किसी | |||
किसे | |||
की | |||
कुछ | |||
कुल | |||
के | |||
को | |||
कोई | |||
कौन | |||
कौनसा | |||
गया | |||
घर | |||
जब | |||
जहाँ | |||
जा | |||
जितना | |||
जिन | |||
जिन्हें | |||
जिन्हों | |||
जिस | |||
जिसे | |||
जीधर | |||
जैसा | |||
जैसे | |||
जो | |||
तक | |||
तब | |||
तरह | |||
तिन | |||
तिन्हें | |||
तिन्हों | |||
तिस | |||
तिसे | |||
तो | |||
था | |||
थी | |||
थे | |||
दबारा | |||
दिया | |||
दुसरा | |||
दूसरे | |||
दो | |||
द्वारा | |||
न | |||
नहीं | |||
ना | |||
निहायत | |||
नीचे | |||
ने | |||
पर | |||
पर | |||
पहले | |||
पूरा | |||
पे | |||
फिर | |||
बनी | |||
बही | |||
बहुत | |||
बाद | |||
बाला | |||
बिलकुल | |||
भी | |||
भीतर | |||
मगर | |||
मानो | |||
मे | |||
में | |||
यदि | |||
यह | |||
यहाँ | |||
यही | |||
या | |||
यिह | |||
ये | |||
रखें | |||
रहा | |||
रहे | |||
ऱ्वासा | |||
लिए | |||
लिये | |||
लेकिन | |||
व | |||
वर्ग | |||
वह | |||
वह | |||
वहाँ | |||
वहीं | |||
वाले | |||
वुह | |||
वे | |||
वग़ैरह | |||
संग | |||
सकता | |||
सकते | |||
सबसे | |||
सभी | |||
साथ | |||
साबुत | |||
साभ | |||
सारा | |||
से | |||
सो | |||
ही | |||
हुआ | |||
हुई | |||
हुए | |||
है | |||
हैं | |||
हो | |||
होता | |||
होती | |||
होते | |||
होना | |||
होने | |||
# additional normalized forms of the above | |||
अपनि | |||
जेसे | |||
होति | |||
सभि | |||
तिंहों | |||
इंहों | |||
दवारा | |||
इसि | |||
किंहें | |||
थि | |||
उंहों | |||
ओर | |||
जिंहें | |||
वहिं | |||
अभि | |||
बनि | |||
हि | |||
उंहिं | |||
उंहें | |||
हें | |||
वगेरह | |||
एसे | |||
रवासा | |||
कोन | |||
निचे | |||
काफि | |||
उसि | |||
पुरा | |||
भितर | |||
हे | |||
बहि | |||
वहां | |||
कोइ | |||
यहां | |||
जिंहों | |||
तिंहें | |||
किसि | |||
कइ | |||
यहि | |||
इंहिं | |||
जिधर | |||
इंहें | |||
अदि | |||
इतयादि | |||
हुइ | |||
कोनसा | |||
इसकि | |||
दुसरे | |||
जहां | |||
अप | |||
किंहों | |||
उनकि | |||
भि | |||
वरग | |||
हुअ | |||
जेसा | |||
नहिं |
@@ -0,0 +1,211 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| Hungarian stop word list | |||
| prepared by Anna Tordai | |||
a | |||
ahogy | |||
ahol | |||
aki | |||
akik | |||
akkor | |||
alatt | |||
által | |||
általában | |||
amely | |||
amelyek | |||
amelyekben | |||
amelyeket | |||
amelyet | |||
amelynek | |||
ami | |||
amit | |||
amolyan | |||
amíg | |||
amikor | |||
át | |||
abban | |||
ahhoz | |||
annak | |||
arra | |||
arról | |||
az | |||
azok | |||
azon | |||
azt | |||
azzal | |||
azért | |||
aztán | |||
azután | |||
azonban | |||
bár | |||
be | |||
belül | |||
benne | |||
cikk | |||
cikkek | |||
cikkeket | |||
csak | |||
de | |||
e | |||
eddig | |||
egész | |||
egy | |||
egyes | |||
egyetlen | |||
egyéb | |||
egyik | |||
egyre | |||
ekkor | |||
el | |||
elég | |||
ellen | |||
elő | |||
először | |||
előtt | |||
első | |||
én | |||
éppen | |||
ebben | |||
ehhez | |||
emilyen | |||
ennek | |||
erre | |||
ez | |||
ezt | |||
ezek | |||
ezen | |||
ezzel | |||
ezért | |||
és | |||
fel | |||
felé | |||
hanem | |||
hiszen | |||
hogy | |||
hogyan | |||
igen | |||
így | |||
illetve | |||
ill. | |||
ill | |||
ilyen | |||
ilyenkor | |||
ison | |||
ismét | |||
itt | |||
jó | |||
jól | |||
jobban | |||
kell | |||
kellett | |||
keresztül | |||
keressünk | |||
ki | |||
kívül | |||
között | |||
közül | |||
legalább | |||
lehet | |||
lehetett | |||
legyen | |||
lenne | |||
lenni | |||
lesz | |||
lett | |||
maga | |||
magát | |||
majd | |||
majd | |||
már | |||
más | |||
másik | |||
meg | |||
még | |||
mellett | |||
mert | |||
mely | |||
melyek | |||
mi | |||
mit | |||
míg | |||
miért | |||
milyen | |||
mikor | |||
minden | |||
mindent | |||
mindenki | |||
mindig | |||
mint | |||
mintha | |||
mivel | |||
most | |||
nagy | |||
nagyobb | |||
nagyon | |||
ne | |||
néha | |||
nekem | |||
neki | |||
nem | |||
néhány | |||
nélkül | |||
nincs | |||
olyan | |||
ott | |||
össze | |||
ő | |||
ők | |||
őket | |||
pedig | |||
persze | |||
rá | |||
s | |||
saját | |||
sem | |||
semmi | |||
sok | |||
sokat | |||
sokkal | |||
számára | |||
szemben | |||
szerint | |||
szinte | |||
talán | |||
tehát | |||
teljes | |||
tovább | |||
továbbá | |||
több | |||
úgy | |||
ugyanis | |||
új | |||
újabb | |||
újra | |||
után | |||
utána | |||
utolsó | |||
vagy | |||
vagyis | |||
valaki | |||
valami | |||
valamint | |||
való | |||
vagyok | |||
van | |||
vannak | |||
volt | |||
voltam | |||
voltak | |||
voltunk | |||
vissza | |||
vele | |||
viszont | |||
volna |
@@ -0,0 +1,46 @@ | |||
# example set of Armenian stopwords. | |||
այդ | |||
այլ | |||
այն | |||
այս | |||
դու | |||
դուք | |||
եմ | |||
են | |||
ենք | |||
ես | |||
եք | |||
է | |||
էի | |||
էին | |||
էինք | |||
էիր | |||
էիք | |||
էր | |||
ըստ | |||
թ | |||
ի | |||
ին | |||
իսկ | |||
իր | |||
կամ | |||
համար | |||
հետ | |||
հետո | |||
մենք | |||
մեջ | |||
մի | |||
ն | |||
նա | |||
նաև | |||
նրա | |||
նրանք | |||
որ | |||
որը | |||
որոնք | |||
որպես | |||
ու | |||
ում | |||
պիտի | |||
վրա | |||
և |
@@ -0,0 +1,359 @@ | |||
# from appendix D of: A Study of Stemming Effects on Information | |||
# Retrieval in Bahasa Indonesia | |||
ada | |||
adanya | |||
adalah | |||
adapun | |||
agak | |||
agaknya | |||
agar | |||
akan | |||
akankah | |||
akhirnya | |||
aku | |||
akulah | |||
amat | |||
amatlah | |||
anda | |||
andalah | |||
antar | |||
diantaranya | |||
antara | |||
antaranya | |||
diantara | |||
apa | |||
apaan | |||
mengapa | |||
apabila | |||
apakah | |||
apalagi | |||
apatah | |||
atau | |||
ataukah | |||
ataupun | |||
bagai | |||
bagaikan | |||
sebagai | |||
sebagainya | |||
bagaimana | |||
bagaimanapun | |||
sebagaimana | |||
bagaimanakah | |||
bagi | |||
bahkan | |||
bahwa | |||
bahwasanya | |||
sebaliknya | |||
banyak | |||
sebanyak | |||
beberapa | |||
seberapa | |||
begini | |||
beginian | |||
beginikah | |||
beginilah | |||
sebegini | |||
begitu | |||
begitukah | |||
begitulah | |||
begitupun | |||
sebegitu | |||
belum | |||
belumlah | |||
sebelum | |||
sebelumnya | |||
sebenarnya | |||
berapa | |||
berapakah | |||
berapalah | |||
berapapun | |||
betulkah | |||
sebetulnya | |||
biasa | |||
biasanya | |||
bila | |||
bilakah | |||
bisa | |||
bisakah | |||
sebisanya | |||
boleh | |||
bolehkah | |||
bolehlah | |||
buat | |||
bukan | |||
bukankah | |||
bukanlah | |||
bukannya | |||
cuma | |||
percuma | |||
dahulu | |||
dalam | |||
dan | |||
dapat | |||
dari | |||
daripada | |||
dekat | |||
demi | |||
demikian | |||
demikianlah | |||
sedemikian | |||
dengan | |||
depan | |||
di | |||
dia | |||
dialah | |||
dini | |||
diri | |||
dirinya | |||
terdiri | |||
dong | |||
dulu | |||
enggak | |||
enggaknya | |||
entah | |||
entahlah | |||
terhadap | |||
terhadapnya | |||
hal | |||
hampir | |||
hanya | |||
hanyalah | |||
harus | |||
haruslah | |||
harusnya | |||
seharusnya | |||
hendak | |||
hendaklah | |||
hendaknya | |||
hingga | |||
sehingga | |||
ia | |||
ialah | |||
ibarat | |||
ingin | |||
inginkah | |||
inginkan | |||
ini | |||
inikah | |||
inilah | |||
itu | |||
itukah | |||
itulah | |||
jangan | |||
jangankan | |||
janganlah | |||
jika | |||
jikalau | |||
juga | |||
justru | |||
kala | |||
kalau | |||
kalaulah | |||
kalaupun | |||
kalian | |||
kami | |||
kamilah | |||
kamu | |||
kamulah | |||
kan | |||
kapan | |||
kapankah | |||
kapanpun | |||
dikarenakan | |||
karena | |||
karenanya | |||
ke | |||
kecil | |||
kemudian | |||
kenapa | |||
kepada | |||
kepadanya | |||
ketika | |||
seketika | |||
khususnya | |||
kini | |||
kinilah | |||
kiranya | |||
sekiranya | |||
kita | |||
kitalah | |||
kok | |||
lagi | |||
lagian | |||
selagi | |||
lah | |||
lain | |||
lainnya | |||
melainkan | |||
selaku | |||
lalu | |||
melalui | |||
terlalu | |||
lama | |||
lamanya | |||
selama | |||
selama | |||
selamanya | |||
lebih | |||
terlebih | |||
bermacam | |||
macam | |||
semacam | |||
maka | |||
makanya | |||
makin | |||
malah | |||
malahan | |||
mampu | |||
mampukah | |||
mana | |||
manakala | |||
manalagi | |||
masih | |||
masihkah | |||
semasih | |||
masing | |||
mau | |||
maupun | |||
semaunya | |||
memang | |||
mereka | |||
merekalah | |||
meski | |||
meskipun | |||
semula | |||
mungkin | |||
mungkinkah | |||
nah | |||
namun | |||
nanti | |||
nantinya | |||
nyaris | |||
oleh | |||
olehnya | |||
seorang | |||
seseorang | |||
pada | |||
padanya | |||
padahal | |||
paling | |||
sepanjang | |||
pantas | |||
sepantasnya | |||
sepantasnyalah | |||
para | |||
pasti | |||
pastilah | |||
per | |||
pernah | |||
pula | |||
pun | |||
merupakan | |||
rupanya | |||
serupa | |||
saat | |||
saatnya | |||
sesaat | |||
saja | |||
sajalah | |||
saling | |||
bersama | |||
sama | |||
sesama | |||
sambil | |||
sampai | |||
sana | |||
sangat | |||
sangatlah | |||
saya | |||
sayalah | |||
se | |||
sebab | |||
sebabnya | |||
sebuah | |||
tersebut | |||
tersebutlah | |||
sedang | |||
sedangkan | |||
sedikit | |||
sedikitnya | |||
segala | |||
segalanya | |||
segera | |||
sesegera | |||
sejak | |||
sejenak | |||
sekali | |||
sekalian | |||
sekalipun | |||
sesekali | |||
sekaligus | |||
sekarang | |||
sekarang | |||
sekitar | |||
sekitarnya | |||
sela | |||
selain | |||
selalu | |||
seluruh | |||
seluruhnya | |||
semakin | |||
sementara | |||
sempat | |||
semua | |||
semuanya | |||
sendiri | |||
sendirinya | |||
seolah | |||
seperti | |||
sepertinya | |||
sering | |||
seringnya | |||
serta | |||
siapa | |||
siapakah | |||
siapapun | |||
disini | |||
disinilah | |||
sini | |||
sinilah | |||
sesuatu | |||
sesuatunya | |||
suatu | |||
sesudah | |||
sesudahnya | |||
sudah | |||
sudahkah | |||
sudahlah | |||
supaya | |||
tadi | |||
tadinya | |||
tak | |||
tanpa | |||
setelah | |||
telah | |||
tentang | |||
tentu | |||
tentulah | |||
tentunya | |||
tertentu | |||
seterusnya | |||
tapi | |||
tetapi | |||
setiap | |||
tiap | |||
setidaknya | |||
tidak | |||
tidakkah | |||
tidaklah | |||
toh | |||
waduh | |||
wah | |||
wahai | |||
sewaktu | |||
walau | |||
walaupun | |||
wong | |||
yaitu | |||
yakni | |||
yang |
@@ -0,0 +1,303 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| An Italian stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
ad | a (to) before vowel | |||
al | a + il | |||
allo | a + lo | |||
ai | a + i | |||
agli | a + gli | |||
all | a + l' | |||
agl | a + gl' | |||
alla | a + la | |||
alle | a + le | |||
con | with | |||
col | con + il | |||
coi | con + i (forms collo, cogli etc are now very rare) | |||
da | from | |||
dal | da + il | |||
dallo | da + lo | |||
dai | da + i | |||
dagli | da + gli | |||
dall | da + l' | |||
dagl | da + gll' | |||
dalla | da + la | |||
dalle | da + le | |||
di | of | |||
del | di + il | |||
dello | di + lo | |||
dei | di + i | |||
degli | di + gli | |||
dell | di + l' | |||
degl | di + gl' | |||
della | di + la | |||
delle | di + le | |||
in | in | |||
nel | in + el | |||
nello | in + lo | |||
nei | in + i | |||
negli | in + gli | |||
nell | in + l' | |||
negl | in + gl' | |||
nella | in + la | |||
nelle | in + le | |||
su | on | |||
sul | su + il | |||
sullo | su + lo | |||
sui | su + i | |||
sugli | su + gli | |||
sull | su + l' | |||
sugl | su + gl' | |||
sulla | su + la | |||
sulle | su + le | |||
per | through, by | |||
tra | among | |||
contro | against | |||
io | I | |||
tu | thou | |||
lui | he | |||
lei | she | |||
noi | we | |||
voi | you | |||
loro | they | |||
mio | my | |||
mia | | |||
miei | | |||
mie | | |||
tuo | | |||
tua | | |||
tuoi | thy | |||
tue | | |||
suo | | |||
sua | | |||
suoi | his, her | |||
sue | | |||
nostro | our | |||
nostra | | |||
nostri | | |||
nostre | | |||
vostro | your | |||
vostra | | |||
vostri | | |||
vostre | | |||
mi | me | |||
ti | thee | |||
ci | us, there | |||
vi | you, there | |||
lo | him, the | |||
la | her, the | |||
li | them | |||
le | them, the | |||
gli | to him, the | |||
ne | from there etc | |||
il | the | |||
un | a | |||
uno | a | |||
una | a | |||
ma | but | |||
ed | and | |||
se | if | |||
perché | why, because | |||
anche | also | |||
come | how | |||
dov | where (as dov') | |||
dove | where | |||
che | who, that | |||
chi | who | |||
cui | whom | |||
non | not | |||
più | more | |||
quale | who, that | |||
quanto | how much | |||
quanti | | |||
quanta | | |||
quante | | |||
quello | that | |||
quelli | | |||
quella | | |||
quelle | | |||
questo | this | |||
questi | | |||
questa | | |||
queste | | |||
si | yes | |||
tutto | all | |||
tutti | all | |||
| single letter forms: | |||
a | at | |||
c | as c' for ce or ci | |||
e | and | |||
i | the | |||
l | as l' | |||
o | or | |||
| forms of avere, to have (not including the infinitive): | |||
ho | |||
hai | |||
ha | |||
abbiamo | |||
avete | |||
hanno | |||
abbia | |||
abbiate | |||
abbiano | |||
avrò | |||
avrai | |||
avrà | |||
avremo | |||
avrete | |||
avranno | |||
avrei | |||
avresti | |||
avrebbe | |||
avremmo | |||
avreste | |||
avrebbero | |||
avevo | |||
avevi | |||
aveva | |||
avevamo | |||
avevate | |||
avevano | |||
ebbi | |||
avesti | |||
ebbe | |||
avemmo | |||
aveste | |||
ebbero | |||
avessi | |||
avesse | |||
avessimo | |||
avessero | |||
avendo | |||
avuto | |||
avuta | |||
avuti | |||
avute | |||
| forms of essere, to be (not including the infinitive): | |||
sono | |||
sei | |||
è | |||
siamo | |||
siete | |||
sia | |||
siate | |||
siano | |||
sarò | |||
sarai | |||
sarà | |||
saremo | |||
sarete | |||
saranno | |||
sarei | |||
saresti | |||
sarebbe | |||
saremmo | |||
sareste | |||
sarebbero | |||
ero | |||
eri | |||
era | |||
eravamo | |||
eravate | |||
erano | |||
fui | |||
fosti | |||
fu | |||
fummo | |||
foste | |||
furono | |||
fossi | |||
fosse | |||
fossimo | |||
fossero | |||
essendo | |||
| forms of fare, to do (not including the infinitive, fa, fat-): | |||
faccio | |||
fai | |||
facciamo | |||
fanno | |||
faccia | |||
facciate | |||
facciano | |||
farò | |||
farai | |||
farà | |||
faremo | |||
farete | |||
faranno | |||
farei | |||
faresti | |||
farebbe | |||
faremmo | |||
fareste | |||
farebbero | |||
facevo | |||
facevi | |||
faceva | |||
facevamo | |||
facevate | |||
facevano | |||
feci | |||
facesti | |||
fece | |||
facemmo | |||
faceste | |||
fecero | |||
facessi | |||
facesse | |||
facessimo | |||
facessero | |||
facendo | |||
| forms of stare, to be (not including the infinitive): | |||
sto | |||
stai | |||
sta | |||
stiamo | |||
stanno | |||
stia | |||
stiate | |||
stiano | |||
starò | |||
starai | |||
starà | |||
staremo | |||
starete | |||
staranno | |||
starei | |||
staresti | |||
starebbe | |||
staremmo | |||
stareste | |||
starebbero | |||
stavo | |||
stavi | |||
stava | |||
stavamo | |||
stavate | |||
stavano | |||
stetti | |||
stesti | |||
stette | |||
stemmo | |||
steste | |||
stettero | |||
stessi | |||
stesse | |||
stessimo | |||
stessero | |||
stando |
@@ -0,0 +1,127 @@ | |||
# | |||
# This file defines a stopword set for Japanese. | |||
# | |||
# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. | |||
# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 | |||
# for frequency lists, etc. that can be useful for making your own set (if desired) | |||
# | |||
# Note that there is an overlap between these stopwords and the terms stopped when used | |||
# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note | |||
# that comments are not allowed on the same line as stopwords. | |||
# | |||
# Also note that stopping is done in a case-insensitive manner. Change your StopFilter | |||
# configuration if you need case-sensitive stopping. Lastly, note that stopping is done | |||
# using the same character width as the entries in this file. Since this StopFilter is | |||
# normally done after a CJKWidthFilter in your chain, you would usually want your romaji | |||
# entries to be in half-width and your kana entries to be in full-width. | |||
# | |||
の | |||
に | |||
は | |||
を | |||
た | |||
が | |||
で | |||
て | |||
と | |||
し | |||
れ | |||
さ | |||
ある | |||
いる | |||
も | |||
する | |||
から | |||
な | |||
こと | |||
として | |||
い | |||
や | |||
れる | |||
など | |||
なっ | |||
ない | |||
この | |||
ため | |||
その | |||
あっ | |||
よう | |||
また | |||
もの | |||
という | |||
あり | |||
まで | |||
られ | |||
なる | |||
へ | |||
か | |||
だ | |||
これ | |||
によって | |||
により | |||
おり | |||
より | |||
による | |||
ず | |||
なり | |||
られる | |||
において | |||
ば | |||
なかっ | |||
なく | |||
しかし | |||
について | |||
せ | |||
だっ | |||
その後 | |||
できる | |||
それ | |||
う | |||
ので | |||
なお | |||
のみ | |||
でき | |||
き | |||
つ | |||
における | |||
および | |||
いう | |||
さらに | |||
でも | |||
ら | |||
たり | |||
その他 | |||
に関する | |||
たち | |||
ます | |||
ん | |||
なら | |||
に対して | |||
特に | |||
せる | |||
及び | |||
これら | |||
とき | |||
では | |||
にて | |||
ほか | |||
ながら | |||
うち | |||
そして | |||
とともに | |||
ただし | |||
かつて | |||
それぞれ | |||
または | |||
お | |||
ほど | |||
ものの | |||
に対する | |||
ほとんど | |||
と共に | |||
といった | |||
です | |||
とも | |||
ところ | |||
ここ | |||
##### End of file |
@@ -0,0 +1,172 @@ | |||
# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins | |||
# the original list of over 800 forms was refined: | |||
# pronouns, adverbs, interjections were removed | |||
# | |||
# prepositions | |||
aiz | |||
ap | |||
ar | |||
apakš | |||
ārpus | |||
augšpus | |||
bez | |||
caur | |||
dēļ | |||
gar | |||
iekš | |||
iz | |||
kopš | |||
labad | |||
lejpus | |||
līdz | |||
no | |||
otrpus | |||
pa | |||
par | |||
pār | |||
pēc | |||
pie | |||
pirms | |||
pret | |||
priekš | |||
starp | |||
šaipus | |||
uz | |||
viņpus | |||
virs | |||
virspus | |||
zem | |||
apakšpus | |||
# Conjunctions | |||
un | |||
bet | |||
jo | |||
ja | |||
ka | |||
lai | |||
tomēr | |||
tikko | |||
turpretī | |||
arī | |||
kaut | |||
gan | |||
tādēļ | |||
tā | |||
ne | |||
tikvien | |||
vien | |||
kā | |||
ir | |||
te | |||
vai | |||
kamēr | |||
# Particles | |||
ar | |||
diezin | |||
droši | |||
diemžēl | |||
nebūt | |||
ik | |||
it | |||
taču | |||
nu | |||
pat | |||
tiklab | |||
iekšpus | |||
nedz | |||
tik | |||
nevis | |||
turpretim | |||
jeb | |||
iekam | |||
iekām | |||
iekāms | |||
kolīdz | |||
līdzko | |||
tiklīdz | |||
jebšu | |||
tālab | |||
tāpēc | |||
nekā | |||
itin | |||
jā | |||
jau | |||
jel | |||
nē | |||
nezin | |||
tad | |||
tikai | |||
vis | |||
tak | |||
iekams | |||
vien | |||
# modal verbs | |||
būt | |||
biju | |||
biji | |||
bija | |||
bijām | |||
bijāt | |||
esmu | |||
esi | |||
esam | |||
esat | |||
būšu | |||
būsi | |||
būs | |||
būsim | |||
būsiet | |||
tikt | |||
tiku | |||
tiki | |||
tika | |||
tikām | |||
tikāt | |||
tieku | |||
tiec | |||
tiek | |||
tiekam | |||
tiekat | |||
tikšu | |||
tiks | |||
tiksim | |||
tiksiet | |||
tapt | |||
tapi | |||
tapāt | |||
topat | |||
tapšu | |||
tapsi | |||
taps | |||
tapsim | |||
tapsiet | |||
kļūt | |||
kļuvu | |||
kļuvi | |||
kļuva | |||
kļuvām | |||
kļuvāt | |||
kļūstu | |||
kļūsti | |||
kļūst | |||
kļūstam | |||
kļūstat | |||
kļūšu | |||
kļūsi | |||
kļūs | |||
kļūsim | |||
kļūsiet | |||
# verbs | |||
varēt | |||
varēju | |||
varējām | |||
varēšu | |||
varēsim | |||
var | |||
varēji | |||
varējāt | |||
varēsi | |||
varēsiet | |||
varat | |||
varēja | |||
varēs |
@@ -0,0 +1,119 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| A Dutch stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
| This is a ranked list (commonest to rarest) of stopwords derived from | |||
| a large sample of Dutch text. | |||
| Dutch stop words frequently exhibit homonym clashes. These are indicated | |||
| clearly below. | |||
de | the | |||
en | and | |||
van | of, from | |||
ik | I, the ego | |||
te | (1) chez, at etc, (2) to, (3) too | |||
dat | that, which | |||
die | that, those, who, which | |||
in | in, inside | |||
een | a, an, one | |||
hij | he | |||
het | the, it | |||
niet | not, nothing, naught | |||
zijn | (1) to be, being, (2) his, one's, its | |||
is | is | |||
was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river | |||
op | on, upon, at, in, up, used up | |||
aan | on, upon, to (as dative) | |||
met | with, by | |||
als | like, such as, when | |||
voor | (1) before, in front of, (2) furrow | |||
had | had, past tense all persons sing. of 'hebben' (have) | |||
er | there | |||
maar | but, only | |||
om | round, about, for etc | |||
hem | him | |||
dan | then | |||
zou | should/would, past tense all persons sing. of 'zullen' | |||
of | or, whether, if | |||
wat | what, something, anything | |||
mijn | possessive and noun 'mine' | |||
men | people, 'one' | |||
dit | this | |||
zo | so, thus, in this way | |||
door | through by | |||
over | over, across | |||
ze | she, her, they, them | |||
zich | oneself | |||
bij | (1) a bee, (2) by, near, at | |||
ook | also, too | |||
tot | till, until | |||
je | you | |||
mij | me | |||
uit | out of, from | |||
der | Old Dutch form of 'van der' still found in surnames | |||
daar | (1) there, (2) because | |||
haar | (1) her, their, them, (2) hair | |||
naar | (1) unpleasant, unwell etc, (2) towards, (3) as | |||
heb | present first person sing. of 'to have' | |||
hoe | how, why | |||
heeft | present third person sing. of 'to have' | |||
hebben | 'to have' and various parts thereof | |||
deze | this | |||
u | you | |||
want | (1) for, (2) mitten, (3) rigging | |||
nog | yet, still | |||
zal | 'shall', first and third person sing. of verb 'zullen' (will) | |||
me | me | |||
zij | she, they | |||
nu | now | |||
ge | 'thou', still used in Belgium and south Netherlands | |||
geen | none | |||
omdat | because | |||
iets | something, somewhat | |||
worden | to become, grow, get | |||
toch | yet, still | |||
al | all, every, each | |||
waren | (1) 'were' (2) to wander, (3) wares, (3) | |||
veel | much, many | |||
meer | (1) more, (2) lake | |||
doen | to do, to make | |||
toen | then, when | |||
moet | noun 'spot/mote' and present form of 'to must' | |||
ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' | |||
zonder | without | |||
kan | noun 'can' and present form of 'to be able' | |||
hun | their, them | |||
dus | so, consequently | |||
alles | all, everything, anything | |||
onder | under, beneath | |||
ja | yes, of course | |||
eens | once, one day | |||
hier | here | |||
wie | who | |||
werd | imperfect third person sing. of 'become' | |||
altijd | always | |||
doch | yet, but etc | |||
wordt | present third person sing. of 'become' | |||
wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans | |||
kunnen | to be able | |||
ons | us/our | |||
zelf | self | |||
tegen | against, towards, at | |||
na | after, near | |||
reeds | already | |||
wil | (1) present tense of 'want', (2) 'will', noun, (3) fender | |||
kon | could; past tense of 'to be able' | |||
niets | nothing | |||
uw | your | |||
iemand | somebody | |||
geweest | been; past participle of 'be' | |||
andere | other |
@@ -0,0 +1,194 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| A Norwegian stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
| This stop word list is for the dominant bokmål dialect. Words unique | |||
| to nynorsk are marked *. | |||
| Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005 | |||
og | and | |||
i | in | |||
jeg | I | |||
det | it/this/that | |||
at | to (w. inf.) | |||
en | a/an | |||
et | a/an | |||
den | it/this/that | |||
til | to | |||
er | is/am/are | |||
som | who/that | |||
på | on | |||
de | they / you(formal) | |||
med | with | |||
han | he | |||
av | of | |||
ikke | not | |||
ikkje | not * | |||
der | there | |||
så | so | |||
var | was/were | |||
meg | me | |||
seg | you | |||
men | but | |||
ett | one | |||
har | have | |||
om | about | |||
vi | we | |||
min | my | |||
mitt | my | |||
ha | have | |||
hadde | had | |||
hun | she | |||
nå | now | |||
over | over | |||
da | when/as | |||
ved | by/know | |||
fra | from | |||
du | you | |||
ut | out | |||
sin | your | |||
dem | them | |||
oss | us | |||
opp | up | |||
man | you/one | |||
kan | can | |||
hans | his | |||
hvor | where | |||
eller | or | |||
hva | what | |||
skal | shall/must | |||
selv | self (reflective) | |||
sjøl | self (reflective) | |||
her | here | |||
alle | all | |||
vil | will | |||
bli | become | |||
ble | became | |||
blei | became * | |||
blitt | have become | |||
kunne | could | |||
inn | in | |||
når | when | |||
være | be | |||
kom | come | |||
noen | some | |||
noe | some | |||
ville | would | |||
dere | you | |||
som | who/which/that | |||
deres | their/theirs | |||
kun | only/just | |||
ja | yes | |||
etter | after | |||
ned | down | |||
skulle | should | |||
denne | this | |||
for | for/because | |||
deg | you | |||
si | hers/his | |||
sine | hers/his | |||
sitt | hers/his | |||
mot | against | |||
å | to | |||
meget | much | |||
hvorfor | why | |||
dette | this | |||
disse | these/those | |||
uten | without | |||
hvordan | how | |||
ingen | none | |||
din | your | |||
ditt | your | |||
blir | become | |||
samme | same | |||
hvilken | which | |||
hvilke | which (plural) | |||
sånn | such a | |||
inni | inside/within | |||
mellom | between | |||
vår | our | |||
hver | each | |||
hvem | who | |||
vors | us/ours | |||
hvis | whose | |||
både | both | |||
bare | only/just | |||
enn | than | |||
fordi | as/because | |||
før | before | |||
mange | many | |||
også | also | |||
slik | just | |||
vært | been | |||
være | to be | |||
båe | both * | |||
begge | both | |||
siden | since | |||
dykk | your * | |||
dykkar | yours * | |||
dei | they * | |||
deira | them * | |||
deires | theirs * | |||
deim | them * | |||
di | your (fem.) * | |||
då | as/when * | |||
eg | I * | |||
ein | a/an * | |||
eit | a/an * | |||
eitt | a/an * | |||
elles | or * | |||
honom | he * | |||
hjå | at * | |||
ho | she * | |||
hoe | she * | |||
henne | her | |||
hennar | her/hers | |||
hennes | hers | |||
hoss | how * | |||
hossen | how * | |||
ikkje | not * | |||
ingi | noone * | |||
inkje | noone * | |||
korleis | how * | |||
korso | how * | |||
kva | what/which * | |||
kvar | where * | |||
kvarhelst | where * | |||
kven | who/whom * | |||
kvi | why * | |||
kvifor | why * | |||
me | we * | |||
medan | while * | |||
mi | my * | |||
mine | my * | |||
mykje | much * | |||
no | now * | |||
nokon | some (masc./neut.) * | |||
noka | some (fem.) * | |||
nokor | some * | |||
noko | some * | |||
nokre | some * | |||
si | his/hers * | |||
sia | since * | |||
sidan | since * | |||
so | so * | |||
somt | some * | |||
somme | some * | |||
um | about* | |||
upp | up * | |||
vere | be * | |||
vore | was * | |||
verte | become * | |||
vort | become * | |||
varte | became * | |||
vart | became * | |||
@@ -0,0 +1,253 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| A Portuguese stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
| The following is a ranked list (commonest to rarest) of stopwords | |||
| deriving from a large sample of text. | |||
| Extra words have been added at the end. | |||
de | of, from | |||
a | the; to, at; her | |||
o | the; him | |||
que | who, that | |||
e | and | |||
do | de + o | |||
da | de + a | |||
em | in | |||
um | a | |||
para | for | |||
| é from SER | |||
com | with | |||
não | not, no | |||
uma | a | |||
os | the; them | |||
no | em + o | |||
se | himself etc | |||
na | em + a | |||
por | for | |||
mais | more | |||
as | the; them | |||
dos | de + os | |||
como | as, like | |||
mas | but | |||
| foi from SER | |||
ao | a + o | |||
ele | he | |||
das | de + as | |||
| tem from TER | |||
à | a + a | |||
seu | his | |||
sua | her | |||
ou | or | |||
| ser from SER | |||
quando | when | |||
muito | much | |||
| há from HAV | |||
nos | em + os; us | |||
já | already, now | |||
| está from EST | |||
eu | I | |||
também | also | |||
só | only, just | |||
pelo | per + o | |||
pela | per + a | |||
até | up to | |||
isso | that | |||
ela | he | |||
entre | between | |||
| era from SER | |||
depois | after | |||
sem | without | |||
mesmo | same | |||
aos | a + os | |||
| ter from TER | |||
seus | his | |||
quem | whom | |||
nas | em + as | |||
me | me | |||
esse | that | |||
eles | they | |||
| estão from EST | |||
você | you | |||
| tinha from TER | |||
| foram from SER | |||
essa | that | |||
num | em + um | |||
nem | nor | |||
suas | her | |||
meu | my | |||
às | a + as | |||
minha | my | |||
| têm from TER | |||
numa | em + uma | |||
pelos | per + os | |||
elas | they | |||
| havia from HAV | |||
| seja from SER | |||
qual | which | |||
| será from SER | |||
nós | we | |||
| tenho from TER | |||
lhe | to him, her | |||
deles | of them | |||
essas | those | |||
esses | those | |||
pelas | per + as | |||
este | this | |||
| fosse from SER | |||
dele | of him | |||
| other words. There are many contractions such as naquele = em+aquele, | |||
| mo = me+o, but they are rare. | |||
| Indefinite article plural forms are also rare. | |||
tu | thou | |||
te | thee | |||
vocês | you (plural) | |||
vos | you | |||
lhes | to them | |||
meus | my | |||
minhas | |||
teu | thy | |||
tua | |||
teus | |||
tuas | |||
nosso | our | |||
nossa | |||
nossos | |||
nossas | |||
dela | of her | |||
delas | of them | |||
esta | this | |||
estes | these | |||
estas | these | |||
aquele | that | |||
aquela | that | |||
aqueles | those | |||
aquelas | those | |||
isto | this | |||
aquilo | that | |||
| forms of estar, to be (not including the infinitive): | |||
estou | |||
está | |||
estamos | |||
estão | |||
estive | |||
esteve | |||
estivemos | |||
estiveram | |||
estava | |||
estávamos | |||
estavam | |||
estivera | |||
estivéramos | |||
esteja | |||
estejamos | |||
estejam | |||
estivesse | |||
estivéssemos | |||
estivessem | |||
estiver | |||
estivermos | |||
estiverem | |||
| forms of haver, to have (not including the infinitive): | |||
hei | |||
há | |||
havemos | |||
hão | |||
houve | |||
houvemos | |||
houveram | |||
houvera | |||
houvéramos | |||
haja | |||
hajamos | |||
hajam | |||
houvesse | |||
houvéssemos | |||
houvessem | |||
houver | |||
houvermos | |||
houverem | |||
houverei | |||
houverá | |||
houveremos | |||
houverão | |||
houveria | |||
houveríamos | |||
houveriam | |||
| forms of ser, to be (not including the infinitive): | |||
sou | |||
somos | |||
são | |||
era | |||
éramos | |||
eram | |||
fui | |||
foi | |||
fomos | |||
foram | |||
fora | |||
fôramos | |||
seja | |||
sejamos | |||
sejam | |||
fosse | |||
fôssemos | |||
fossem | |||
for | |||
formos | |||
forem | |||
serei | |||
será | |||
seremos | |||
serão | |||
seria | |||
seríamos | |||
seriam | |||
| forms of ter, to have (not including the infinitive): | |||
tenho | |||
tem | |||
temos | |||
tém | |||
tinha | |||
tínhamos | |||
tinham | |||
tive | |||
teve | |||
tivemos | |||
tiveram | |||
tivera | |||
tivéramos | |||
tenha | |||
tenhamos | |||
tenham | |||
tivesse | |||
tivéssemos | |||
tivessem | |||
tiver | |||
tivermos | |||
tiverem | |||
terei | |||
terá | |||
teremos | |||
terão | |||
teria | |||
teríamos | |||
teriam |
@@ -0,0 +1,233 @@ | |||
# This file was created by Jacques Savoy and is distributed under the BSD license. | |||
# See http://members.unine.ch/jacques.savoy/clef/index.html. | |||
# Also see http://www.opensource.org/licenses/bsd-license.html | |||
acea | |||
aceasta | |||
această | |||
aceea | |||
acei | |||
aceia | |||
acel | |||
acela | |||
acele | |||
acelea | |||
acest | |||
acesta | |||
aceste | |||
acestea | |||
aceşti | |||
aceştia | |||
acolo | |||
acum | |||
ai | |||
aia | |||
aibă | |||
aici | |||
al | |||
ăla | |||
ale | |||
alea | |||
ălea | |||
altceva | |||
altcineva | |||
am | |||
ar | |||
are | |||
aş | |||
aşadar | |||
asemenea | |||
asta | |||
ăsta | |||
astăzi | |||
astea | |||
ăstea | |||
ăştia | |||
asupra | |||
aţi | |||
au | |||
avea | |||
avem | |||
aveţi | |||
azi | |||
bine | |||
bucur | |||
bună | |||
ca | |||
că | |||
căci | |||
când | |||
care | |||
cărei | |||
căror | |||
cărui | |||
cât | |||
câte | |||
câţi | |||
către | |||
câtva | |||
ce | |||
cel | |||
ceva | |||
chiar | |||
cînd | |||
cine | |||
cineva | |||
cît | |||
cîte | |||
cîţi | |||
cîtva | |||
contra | |||
cu | |||
cum | |||
cumva | |||
curând | |||
curînd | |||
da | |||
dă | |||
dacă | |||
dar | |||
datorită | |||
de | |||
deci | |||
deja | |||
deoarece | |||
departe | |||
deşi | |||
din | |||
dinaintea | |||
dintr | |||
dintre | |||
drept | |||
după | |||
ea | |||
ei | |||
el | |||
ele | |||
eram | |||
este | |||
eşti | |||
eu | |||
face | |||
fără | |||
fi | |||
fie | |||
fiecare | |||
fii | |||
fim | |||
fiţi | |||
iar | |||
ieri | |||
îi | |||
îl | |||
îmi | |||
împotriva | |||
în | |||
înainte | |||
înaintea | |||
încât | |||
încît | |||
încotro | |||
între | |||
întrucât | |||
întrucît | |||
îţi | |||
la | |||
lângă | |||
le | |||
li | |||
lîngă | |||
lor | |||
lui | |||
mă | |||
mâine | |||
mea | |||
mei | |||
mele | |||
mereu | |||
meu | |||
mi | |||
mine | |||
mult | |||
multă | |||
mulţi | |||
ne | |||
nicăieri | |||
nici | |||
nimeni | |||
nişte | |||
noastră | |||
noastre | |||
noi | |||
noştri | |||
nostru | |||
nu | |||
ori | |||
oricând | |||
oricare | |||
oricât | |||
orice | |||
oricînd | |||
oricine | |||
oricît | |||
oricum | |||
oriunde | |||
până | |||
pe | |||
pentru | |||
peste | |||
pînă | |||
poate | |||
pot | |||
prea | |||
prima | |||
primul | |||
prin | |||
printr | |||
sa | |||
să | |||
săi | |||
sale | |||
sau | |||
său | |||
se | |||
şi | |||
sînt | |||
sîntem | |||
sînteţi | |||
spre | |||
sub | |||
sunt | |||
suntem | |||
sunteţi | |||
ta | |||
tăi | |||
tale | |||
tău | |||
te | |||
ţi | |||
ţie | |||
tine | |||
toată | |||
toate | |||
tot | |||
toţi | |||
totuşi | |||
tu | |||
un | |||
una | |||
unde | |||
undeva | |||
unei | |||
unele | |||
uneori | |||
unor | |||
vă | |||
vi | |||
voastră | |||
voastre | |||
voi | |||
voştri | |||
vostru | |||
vouă | |||
vreo | |||
vreun |
@@ -0,0 +1,243 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| a russian stop word list. comments begin with vertical bar. each stop | |||
| word is at the start of a line. | |||
| this is a ranked list (commonest to rarest) of stopwords derived from | |||
| a large text sample. | |||
| letter `ё' is translated to `е'. | |||
и | and | |||
в | in/into | |||
во | alternative form | |||
не | not | |||
что | what/that | |||
он | he | |||
на | on/onto | |||
я | i | |||
с | from | |||
со | alternative form | |||
как | how | |||
а | milder form of `no' (but) | |||
то | conjunction and form of `that' | |||
все | all | |||
она | she | |||
так | so, thus | |||
его | him | |||
но | but | |||
да | yes/and | |||
ты | thou | |||
к | towards, by | |||
у | around, chez | |||
же | intensifier particle | |||
вы | you | |||
за | beyond, behind | |||
бы | conditional/subj. particle | |||
по | up to, along | |||
только | only | |||
ее | her | |||
мне | to me | |||
было | it was | |||
вот | here is/are, particle | |||
от | away from | |||
меня | me | |||
еще | still, yet, more | |||
нет | no, there isnt/arent | |||
о | about | |||
из | out of | |||
ему | to him | |||
теперь | now | |||
когда | when | |||
даже | even | |||
ну | so, well | |||
вдруг | suddenly | |||
ли | interrogative particle | |||
если | if | |||
уже | already, but homonym of `narrower' | |||
или | or | |||
ни | neither | |||
быть | to be | |||
был | he was | |||
него | prepositional form of его | |||
до | up to | |||
вас | you accusative | |||
нибудь | indef. suffix preceded by hyphen | |||
опять | again | |||
уж | already, but homonym of `adder' | |||
вам | to you | |||
сказал | he said | |||
ведь | particle `after all' | |||
там | there | |||
потом | then | |||
себя | oneself | |||
ничего | nothing | |||
ей | to her | |||
может | usually with `быть' as `maybe' | |||
они | they | |||
тут | here | |||
где | where | |||
есть | there is/are | |||
надо | got to, must | |||
ней | prepositional form of ей | |||
для | for | |||
мы | we | |||
тебя | thee | |||
их | them, their | |||
чем | than | |||
была | she was | |||
сам | self | |||
чтоб | in order to | |||
без | without | |||
будто | as if | |||
человек | man, person, one | |||
чего | genitive form of `what' | |||
раз | once | |||
тоже | also | |||
себе | to oneself | |||
под | beneath | |||
жизнь | life | |||
будет | will be | |||
ж | short form of intensifer particle `же' | |||
тогда | then | |||
кто | who | |||
этот | this | |||
говорил | was saying | |||
того | genitive form of `that' | |||
потому | for that reason | |||
этого | genitive form of `this' | |||
какой | which | |||
совсем | altogether | |||
ним | prepositional form of `его', `они' | |||
здесь | here | |||
этом | prepositional form of `этот' | |||
один | one | |||
почти | almost | |||
мой | my | |||
тем | instrumental/dative plural of `тот', `то' | |||
чтобы | full form of `in order that' | |||
нее | her (acc.) | |||
кажется | it seems | |||
сейчас | now | |||
были | they were | |||
куда | where to | |||
зачем | why | |||
сказать | to say | |||
всех | all (acc., gen. preposn. plural) | |||
никогда | never | |||
сегодня | today | |||
можно | possible, one can | |||
при | by | |||
наконец | finally | |||
два | two | |||
об | alternative form of `о', about | |||
другой | another | |||
хоть | even | |||
после | after | |||
над | above | |||
больше | more | |||
тот | that one (masc.) | |||
через | across, in | |||
эти | these | |||
нас | us | |||
про | about | |||
всего | in all, only, of all | |||
них | prepositional form of `они' (they) | |||
какая | which, feminine | |||
много | lots | |||
разве | interrogative particle | |||
сказала | she said | |||
три | three | |||
эту | this, acc. fem. sing. | |||
моя | my, feminine | |||
впрочем | moreover, besides | |||
хорошо | good | |||
свою | ones own, acc. fem. sing. | |||
этой | oblique form of `эта', fem. `this' | |||
перед | in front of | |||
иногда | sometimes | |||
лучше | better | |||
чуть | a little | |||
том | preposn. form of `that one' | |||
нельзя | one must not | |||
такой | such a one | |||
им | to them | |||
более | more | |||
всегда | always | |||
конечно | of course | |||
всю | acc. fem. sing of `all' | |||
между | between | |||
| b: some paradigms | |||
| | |||
| personal pronouns | |||
| | |||
| я меня мне мной [мною] | |||
| ты тебя тебе тобой [тобою] | |||
| он его ему им [него, нему, ним] | |||
| она ее эи ею [нее, нэи, нею] | |||
| оно его ему им [него, нему, ним] | |||
| | |||
| мы нас нам нами | |||
| вы вас вам вами | |||
| они их им ими [них, ним, ними] | |||
| | |||
| себя себе собой [собою] | |||
| | |||
| demonstrative pronouns: этот (this), тот (that) | |||
| | |||
| этот эта это эти | |||
| этого эты это эти | |||
| этого этой этого этих | |||
| этому этой этому этим | |||
| этим этой этим [этою] этими | |||
| этом этой этом этих | |||
| | |||
| тот та то те | |||
| того ту то те | |||
| того той того тех | |||
| тому той тому тем | |||
| тем той тем [тою] теми | |||
| том той том тех | |||
| | |||
| determinative pronouns | |||
| | |||
| (a) весь (all) | |||
| | |||
| весь вся все все | |||
| всего всю все все | |||
| всего всей всего всех | |||
| всему всей всему всем | |||
| всем всей всем [всею] всеми | |||
| всем всей всем всех | |||
| | |||
| (b) сам (himself etc) | |||
| | |||
| сам сама само сами | |||
| самого саму само самих | |||
| самого самой самого самих | |||
| самому самой самому самим | |||
| самим самой самим [самою] самими | |||
| самом самой самом самих | |||
| | |||
| stems of verbs `to be', `to have', `to do' and modal | |||
| | |||
| быть бы буд быв есть суть | |||
| име | |||
| дел | |||
| мог мож мочь | |||
| уме | |||
| хоч хот | |||
| долж | |||
| можн | |||
| нужн | |||
| нельзя | |||
@@ -0,0 +1,133 @@ | |||
| From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| A Swedish stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
| This is a ranked list (commonest to rarest) of stopwords derived from | |||
| a large text sample. | |||
| Swedish stop words occasionally exhibit homonym clashes. For example | |||
| så = so, but also seed. These are indicated clearly below. | |||
och | and | |||
det | it, this/that | |||
att | to (with infinitive) | |||
i | in, at | |||
en | a | |||
jag | I | |||
hon | she | |||
som | who, that | |||
han | he | |||
på | on | |||
den | it, this/that | |||
med | with | |||
var | where, each | |||
sig | him(self) etc | |||
för | for | |||
så | so (also: seed) | |||
till | to | |||
är | is | |||
men | but | |||
ett | a | |||
om | if; around, about | |||
hade | had | |||
de | they, these/those | |||
av | of | |||
icke | not, no | |||
mig | me | |||
du | you | |||
henne | her | |||
då | then, when | |||
sin | his | |||
nu | now | |||
har | have | |||
inte | inte någon = no one | |||
hans | his | |||
honom | him | |||
skulle | 'sake' | |||
hennes | her | |||
där | there | |||
min | my | |||
man | one (pronoun) | |||
ej | nor | |||
vid | at, by, on (also: vast) | |||
kunde | could | |||
något | some etc | |||
från | from, off | |||
ut | out | |||
när | when | |||
efter | after, behind | |||
upp | up | |||
vi | we | |||
dem | them | |||
vara | be | |||
vad | what | |||
över | over | |||
än | than | |||
dig | you | |||
kan | can | |||
sina | his | |||
här | here | |||
ha | have | |||
mot | towards | |||
alla | all | |||
under | under (also: wonder) | |||
någon | some etc | |||
eller | or (else) | |||
allt | all | |||
mycket | much | |||
sedan | since | |||
ju | why | |||
denna | this/that | |||
själv | myself, yourself etc | |||
detta | this/that | |||
åt | to | |||
utan | without | |||
varit | was | |||
hur | how | |||
ingen | no | |||
mitt | my | |||
ni | you | |||
bli | to be, become | |||
blev | from bli | |||
oss | us | |||
din | thy | |||
dessa | these/those | |||
några | some etc | |||
deras | their | |||
blir | from bli | |||
mina | my | |||
samma | (the) same | |||
vilken | who, that | |||
er | you, your | |||
sådan | such a | |||
vår | our | |||
blivit | from bli | |||
dess | its | |||
inom | within | |||
mellan | between | |||
sådant | such a | |||
varför | why | |||
varje | each | |||
vilka | who, that | |||
ditt | thy | |||
vem | who | |||
vilket | who, that | |||
sitta | his | |||
sådana | such a | |||
vart | each | |||
dina | thy | |||
vars | whose | |||
vårt | our | |||
våra | our | |||
ert | your | |||
era | your | |||
vilkas | whose | |||
@@ -0,0 +1,119 @@ | |||
# Thai stopwords from: | |||
# "Opinion Detection in Thai Political News Columns | |||
# Based on Subjectivity Analysis" | |||
# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak | |||
ไว้ | |||
ไม่ | |||
ไป | |||
ได้ | |||
ให้ | |||
ใน | |||
โดย | |||
แห่ง | |||
แล้ว | |||
และ | |||
แรก | |||
แบบ | |||
แต่ | |||
เอง | |||
เห็น | |||
เลย | |||
เริ่ม | |||
เรา | |||
เมื่อ | |||
เพื่อ | |||
เพราะ | |||
เป็นการ | |||
เป็น | |||
เปิดเผย | |||
เปิด | |||
เนื่องจาก | |||
เดียวกัน | |||
เดียว | |||
เช่น | |||
เฉพาะ | |||
เคย | |||
เข้า | |||
เขา | |||
อีก | |||
อาจ | |||
อะไร | |||
ออก | |||
อย่าง | |||
อยู่ | |||
อยาก | |||
หาก | |||
หลาย | |||
หลังจาก | |||
หลัง | |||
หรือ | |||
หนึ่ง | |||
ส่วน | |||
ส่ง | |||
สุด | |||
สําหรับ | |||
ว่า | |||
วัน | |||
ลง | |||
ร่วม | |||
ราย | |||
รับ | |||
ระหว่าง | |||
รวม | |||
ยัง | |||
มี | |||
มาก | |||
มา | |||
พร้อม | |||
พบ | |||
ผ่าน | |||
ผล | |||
บาง | |||
น่า | |||
นี้ | |||
นํา | |||
นั้น | |||
นัก | |||
นอกจาก | |||
ทุก | |||
ที่สุด | |||
ที่ | |||
ทําให้ | |||
ทํา | |||
ทาง | |||
ทั้งนี้ | |||
ทั้ง | |||
ถ้า | |||
ถูก | |||
ถึง | |||
ต้อง | |||
ต่างๆ | |||
ต่าง | |||
ต่อ | |||
ตาม | |||
ตั้งแต่ | |||
ตั้ง | |||
ด้าน | |||
ด้วย | |||
ดัง | |||
ซึ่ง | |||
ช่วง | |||
จึง | |||
จาก | |||
จัด | |||
จะ | |||
คือ | |||
ความ | |||
ครั้ง | |||
คง | |||
ขึ้น | |||
ของ | |||
ขอ | |||
ขณะ | |||
ก่อน | |||
ก็ | |||
การ | |||
กับ | |||
กัน | |||
กว่า | |||
กล่าว |
@@ -0,0 +1,212 @@ | |||
# Turkish stopwords from LUCENE-559 | |||
# merged with the list from "Information Retrieval on Turkish Texts" | |||
# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) | |||
acaba | |||
altmış | |||
altı | |||
ama | |||
ancak | |||
arada | |||
aslında | |||
ayrıca | |||
bana | |||
bazı | |||
belki | |||
ben | |||
benden | |||
beni | |||
benim | |||
beri | |||
beş | |||
bile | |||
bin | |||
bir | |||
birçok | |||
biri | |||
birkaç | |||
birkez | |||
birşey | |||
birşeyi | |||
biz | |||
bize | |||
bizden | |||
bizi | |||
bizim | |||
böyle | |||
böylece | |||
bu | |||
buna | |||
bunda | |||
bundan | |||
bunlar | |||
bunları | |||
bunların | |||
bunu | |||
bunun | |||
burada | |||
çok | |||
çünkü | |||
da | |||
daha | |||
dahi | |||
de | |||
defa | |||
değil | |||
diğer | |||
diye | |||
doksan | |||
dokuz | |||
dolayı | |||
dolayısıyla | |||
dört | |||
edecek | |||
eden | |||
ederek | |||
edilecek | |||
ediliyor | |||
edilmesi | |||
ediyor | |||
eğer | |||
elli | |||
en | |||
etmesi | |||
etti | |||
ettiği | |||
ettiğini | |||
gibi | |||
göre | |||
halen | |||
hangi | |||
hatta | |||
hem | |||
henüz | |||
hep | |||
hepsi | |||
her | |||
herhangi | |||
herkesin | |||
hiç | |||
hiçbir | |||
için | |||
iki | |||
ile | |||
ilgili | |||
ise | |||
işte | |||
itibaren | |||
itibariyle | |||
kadar | |||
karşın | |||
katrilyon | |||
kendi | |||
kendilerine | |||
kendini | |||
kendisi | |||
kendisine | |||
kendisini | |||
kez | |||
ki | |||
kim | |||
kimden | |||
kime | |||
kimi | |||
kimse | |||
kırk | |||
milyar | |||
milyon | |||
mu | |||
mü | |||
mı | |||
nasıl | |||
ne | |||
neden | |||
nedenle | |||
nerde | |||
nerede | |||
nereye | |||
niye | |||
niçin | |||
o | |||
olan | |||
olarak | |||
oldu | |||
olduğu | |||
olduğunu | |||
olduklarını | |||
olmadı | |||
olmadığı | |||
olmak | |||
olması | |||
olmayan | |||
olmaz | |||
olsa | |||
olsun | |||
olup | |||
olur | |||
olursa | |||
oluyor | |||
on | |||
ona | |||
ondan | |||
onlar | |||
onlardan | |||
onları | |||
onların | |||
onu | |||
onun | |||
otuz | |||
oysa | |||
öyle | |||
pek | |||
rağmen | |||
sadece | |||
sanki | |||
sekiz | |||
seksen | |||
sen | |||
senden | |||
seni | |||
senin | |||
siz | |||
sizden | |||
sizi | |||
sizin | |||
şey | |||
şeyden | |||
şeyi | |||
şeyler | |||
şöyle | |||
şu | |||
şuna | |||
şunda | |||
şundan | |||
şunları | |||
şunu | |||
tarafından | |||
trilyon | |||
tüm | |||
üç | |||
üzere | |||
var | |||
vardı | |||
ve | |||
veya | |||
ya | |||
yani | |||
yapacak | |||
yapılan | |||
yapılması | |||
yapıyor | |||
yapmak | |||
yaptı | |||
yaptığı | |||
yaptığını | |||
yaptıkları | |||
yedi | |||
yerine | |||
yetmiş | |||
yine | |||
yirmi | |||
yoksa | |||
yüz | |||
zaten |
@@ -0,0 +1,29 @@ | |||
# | |||
# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) | |||
# | |||
# Add entries to this file in order to override the statistical model in terms | |||
# of segmentation, readings and part-of-speech tags. Notice that entries do | |||
# not have weights since they are always used when found. This is by-design | |||
# in order to maximize ease-of-use. | |||
# | |||
# Entries are defined using the following CSV format: | |||
# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag> | |||
# | |||
# Notice that a single half-width space separates tokens and readings, and | |||
# that the number tokens and readings must match exactly. | |||
# | |||
# Also notice that multiple entries with the same <text> is undefined. | |||
# | |||
# Whitespace only lines are ignored. Comments are not allowed on entry lines. | |||
# | |||
# Custom segmentation for kanji compounds | |||
日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 | |||
関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 | |||
# Custom segmentation for compound katakana | |||
トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 | |||
ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 | |||
# Custom reading for former sumo wrestler | |||
朝青龍,朝青龍,アサショウリュウ,カスタム人名 |
@@ -0,0 +1,34 @@ | |||
{"params":{ | |||
"query":{ | |||
"defType":"edismax", | |||
"q.alt":"*:*", | |||
"rows":"10", | |||
"fl":"*,score", | |||
"":{"v":0}}, | |||
"facets":{ | |||
"facet":"on", | |||
"facet.mincount":"1", | |||
"f.doc_type.facet.mincount":"0", | |||
"facet.field":["text_shingles","{!ex=type}doc_type", "language"], | |||
"f.text_shingles.facet.limit":10, | |||
"facet.query":"{!ex=type key=all_types}*:*", | |||
"f.doc_type.facet.missing":true, | |||
"":{"v":0}}, | |||
"browse":{ | |||
"type_fq":"{!field f=doc_type v=$type}", | |||
"hl":"on", | |||
"hl.fl":"content", | |||
"v.locale":"${locale}", | |||
"debug":"true", | |||
"hl.simple.pre":"HL_START", | |||
"hl.simple.post":"HL_END", | |||
"echoParams": "explicit", | |||
"_appends_": { | |||
"fq": "{!switch v=$type tag=type case='*:*' case.all='*:*' case.unknown='-doc_type:[* TO *]' default=$type_fq}" | |||
}, | |||
"":{"v":0}}, | |||
"velocity":{ | |||
"wt":"velocity", | |||
"v.template":"browse", | |||
"v.layout":"layout", | |||
"":{"v":0}}}} |
@@ -0,0 +1,21 @@ | |||
# The ASF licenses this file to You under the Apache License, Version 2.0 | |||
# (the "License"); you may not use this file except in compliance with | |||
# the License. You may obtain a copy of the License at | |||
# | |||
# http://www.apache.org/licenses/LICENSE-2.0 | |||
# | |||
# Unless required by applicable law or agreed to in writing, software | |||
# distributed under the License is distributed on an "AS IS" BASIS, | |||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
# See the License for the specific language governing permissions and | |||
# limitations under the License. | |||
#----------------------------------------------------------------------- | |||
# Use a protected word file to protect against the stemmer reducing two | |||
# unrelated words to the same base word. | |||
# Some non-words that normally won't be encountered, | |||
# just to test that they won't be stemmed. | |||
dontstems | |||
zwhacky | |||
@@ -0,0 +1,530 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<!-- Solr managed schema - automatically generated - DO NOT EDIT --> | |||
<schema name="example-data-driven-schema" version="1.6"> | |||
<uniqueKey>id</uniqueKey> | |||
<fieldType name="ancestor_path" class="solr.TextField"> | |||
<analyzer type="index"> | |||
<tokenizer class="solr.KeywordTokenizerFactory"/> | |||
</analyzer> | |||
<analyzer type="query"> | |||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="binary" class="solr.BinaryField"/> | |||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | |||
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> | |||
<fieldType name="currency" class="solr.CurrencyFieldType" amountLongSuffix="_l_ns" codeStrSuffix="_s_ns" defaultCurrency="USD" currencyConfig="currency.xml" /> | |||
<fieldType name="descendent_path" class="solr.TextField"> | |||
<analyzer type="index"> | |||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> | |||
</analyzer> | |||
<analyzer type="query"> | |||
<tokenizer class="solr.KeywordTokenizerFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/> | |||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | |||
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" maxDistErr="0.001" distErrPct="0.025" distanceUnits="kilometers"/> | |||
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.KeywordTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="phonetic_en" class="solr.TextField" indexed="true" stored="false"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/> | |||
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> | |||
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> | |||
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> | |||
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> | |||
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> | |||
<fieldType name="pint" class="solr.IntPointField" docValues="true"/> | |||
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> | |||
<fieldType name="plong" class="solr.LongPointField" docValues="true"/> | |||
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> | |||
<fieldType name="point" class="solr.PointType" subFieldSuffix="_d" dimension="2"/> | |||
<fieldType name="random" class="solr.RandomSortField" indexed="true"/> | |||
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/> | |||
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/> | |||
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ar.txt" ignoreCase="true"/> | |||
<filter class="solr.ArabicNormalizationFilterFactory"/> | |||
<filter class="solr.ArabicStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_bg.txt" ignoreCase="true"/> | |||
<filter class="solr.BulgarianStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_ca.txt" ignoreCase="true"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ca.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.CJKWidthFilterFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.CJKBigramFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_cz.txt" ignoreCase="true"/> | |||
<filter class="solr.CzechStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_da.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_de.txt" ignoreCase="true"/> | |||
<filter class="solr.GermanNormalizationFilterFactory"/> | |||
<filter class="solr.GermanLightStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.GreekLowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_el.txt" ignoreCase="false"/> | |||
<filter class="solr.GreekStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer type="index"> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.EnglishPossessiveFilterFactory"/> | |||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
<filter class="solr.PorterStemFilterFactory"/> | |||
</analyzer> | |||
<analyzer type="query"> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.EnglishPossessiveFilterFactory"/> | |||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
<filter class="solr.PorterStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |||
<analyzer type="index"> | |||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
<filter class="solr.PorterStemFilterFactory"/> | |||
<filter class="solr.FlattenGraphFilterFactory" /> | |||
</analyzer> | |||
<analyzer type="query"> | |||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
<filter class="solr.PorterStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |||
<analyzer type="index"> | |||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
<filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
<filter class="solr.EnglishMinimalStemFilterFactory"/> | |||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |||
<filter class="solr.FlattenGraphFilterFactory" /> | |||
</analyzer> | |||
<analyzer type="query"> | |||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
<filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> | |||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |||
<filter class="solr.EnglishMinimalStemFilterFactory"/> | |||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_es.txt" ignoreCase="true"/> | |||
<filter class="solr.SpanishLightStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_eu.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<charFilter class="solr.PersianCharFilterFactory"/> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.ArabicNormalizationFilterFactory"/> | |||
<filter class="solr.PersianNormalizationFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_fa.txt" ignoreCase="true"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fi.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_fr.txt" ignoreCase="true"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fr.txt" ignoreCase="true"/> | |||
<filter class="solr.FrenchLightStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_ga.txt" ignoreCase="true"/> | |||
<filter class="solr.StopFilterFactory" words="lang/hyphenations_ga.txt" ignoreCase="true"/> | |||
<filter class="solr.IrishLowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ga.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> | |||
<analyzer type="index"> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
</analyzer> | |||
<analyzer type="query"> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer type="index"> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.ReversedWildcardFilterFactory" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/> | |||
</analyzer> | |||
<analyzer type="query"> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_gl.txt" ignoreCase="true"/> | |||
<filter class="solr.GalicianStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.IndicNormalizationFilterFactory"/> | |||
<filter class="solr.HindiNormalizationFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_hi.txt" ignoreCase="true"/> | |||
<filter class="solr.HindiStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_hu.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_hy.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_id.txt" ignoreCase="true"/> | |||
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt" ignoreCase="true"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_it.txt" ignoreCase="true"/> | |||
<filter class="solr.ItalianLightStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_ja" class="solr.TextField" autoGeneratePhraseQueries="false" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> | |||
<filter class="solr.JapaneseBaseFormFilterFactory"/> | |||
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt"/> | |||
<filter class="solr.CJKWidthFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ja.txt" ignoreCase="true"/> | |||
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> | |||
<filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> | |||
<filter class="solr.KoreanReadingFormFilterFactory" /> | |||
<filter class="solr.LowerCaseFilterFactory" /> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_lv.txt" ignoreCase="true"/> | |||
<filter class="solr.LatvianStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_nl.txt" ignoreCase="true"/> | |||
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_no.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_pt.txt" ignoreCase="true"/> | |||
<filter class="solr.PortugueseLightStemFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ro.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_ru.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_sv.txt" ignoreCase="true"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.ThaiTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_th.txt" ignoreCase="true"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<filter class="solr.TurkishLowerCaseFilterFactory"/> | |||
<filter class="solr.StopFilterFactory" words="lang/stopwords_tr.txt" ignoreCase="false"/> | |||
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_email_url" class="solr.TextField"> | |||
<analyzer> | |||
<tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> | |||
<filter class="solr.TypeTokenFilterFactory" types="email_url_types.txt" useWhitelist="true"/> | |||
</analyzer> | |||
</fieldType> | |||
<fieldType name="text_shingles" class="solr.TextField" positionIncrementGap="100" multiValued="true"> | |||
<analyzer type="index"> | |||
<tokenizer class="solr.StandardTokenizerFactory"/> | |||
<!-- <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="false" /> --> | |||
<filter class="solr.LengthFilterFactory" min="2" max="18"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
<filter class="solr.PatternReplaceFilterFactory" pattern="(^[^a-z]+$)" replacement="" replace="all"/> | |||
<filter class="solr.ShingleFilterFactory" minShingleSize="3" maxShingleSize="3" | |||
outputUnigrams="false" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="*"/> | |||
<filter class="solr.PatternReplaceFilterFactory" pattern="(.*[\*].*)" replacement=""/> | |||
<filter class="solr.TrimFilterFactory"/> | |||
<!-- PRFF could have removed everything down to an empty string, remove if so --> | |||
<filter class="solr.LengthFilterFactory" min="1" max="100"/> | |||
</analyzer> | |||
<analyzer type="query"> | |||
<tokenizer class="solr.KeywordTokenizerFactory"/> | |||
<filter class="solr.LowerCaseFilterFactory"/> | |||
</analyzer> | |||
</fieldType> | |||
<field name="id" type="string" multiValued="false" indexed="true" required="true" stored="true"/> | |||
<field name="_version_" type="plong" indexed="true" stored="true"/> | |||
<field name="content_type" type="string" indexed="true" stored="true"/> | |||
<field name="doc_type" type="string" indexed="true" stored="true"/> | |||
<field name="title" type="string" indexed="true" stored="true"/> | |||
<field name="language" type="string" indexed="true" stored="true"/> | |||
<field name="content" type="text_general" multiValued="false" indexed="true" stored="true"/> | |||
<field name="text_shingles" type="text_shingles" indexed="true" stored="false"/> | |||
<field name="_text_" type="text_general" multiValued="true" indexed="true" stored="false"/> | |||
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> | |||
<dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> | |||
<dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> | |||
<dynamicField name="*_coordinate" type="pdouble" indexed="true" stored="false"/> | |||
<dynamicField name="ignored_*" type="ignored" multiValued="true"/> | |||
<dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/> | |||
<dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> | |||
<dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/> | |||
<dynamicField name="random_*" type="random"/> | |||
<dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_fa" type="text_fa" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/> | |||
<dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/> | |||
<dynamicField name="*_point" type="point" indexed="true" stored="true"/> | |||
<dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/> | |||
<dynamicField name="attr_*" type="text_general" multiValued="true" indexed="true" stored="true"/> | |||
<dynamicField name="*_l_ns" type="plong" indexed="true" stored="false"/> | |||
<dynamicField name="*_s_ns" type="string" indexed="true" stored="false"/> | |||
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/> | |||
<dynamicField name="*_dts" type="pdate" multiValued="true" indexed="true" stored="true"/> | |||
<dynamicField name="*_is" type="pints" indexed="true" stored="true"/> | |||
<dynamicField name="*_ss" type="strings" indexed="true" stored="true"/> | |||
<dynamicField name="*_ls" type="plongs" indexed="true" stored="true"/> | |||
<dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/> | |||
<dynamicField name="*_fs" type="pfloats" indexed="true" stored="true"/> | |||
<dynamicField name="*_ds" type="pdoubles" indexed="true" stored="true"/> | |||
<dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> | |||
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> | |||
<dynamicField name="*_i" type="pint" indexed="true" stored="true"/> | |||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/> | |||
<dynamicField name="*_l" type="plong" indexed="true" stored="true"/> | |||
<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/> | |||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> | |||
<dynamicField name="*_f" type="pfloat" indexed="true" stored="true"/> | |||
<dynamicField name="*_d" type="pdouble" indexed="true" stored="true"/> | |||
<dynamicField name="*_p" type="location" indexed="true" stored="true"/> | |||
<dynamicField name="*_c" type="currency" indexed="true" stored="true"/> | |||
<copyField source="content" dest="text_shingles"/> | |||
<copyField source="*" dest="_text_"/> | |||
<!-- ADDED BY SIMON BOWIE 2022-04-04 --> | |||
<copyField source="content" dest="year"/> | |||
<field name="year" type="year" indexed="true" stored="true"/> | |||
<fieldType name="year" class="solr.TextField" positionIncrementGap="100"> | |||
<analyzer> | |||
<tokenizer class="solr.PatternTokenizerFactory" pattern="=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})" group="1" /> | |||
</analyzer> | |||
</fieldType> | |||
<!-- END --> | |||
</schema> |
@@ -0,0 +1,14 @@ | |||
# Licensed to the Apache Software Foundation (ASF) under one or more | |||
# contributor license agreements. See the NOTICE file distributed with | |||
# this work for additional information regarding copyright ownership. | |||
# The ASF licenses this file to You under the Apache License, Version 2.0 | |||
# (the "License"); you may not use this file except in compliance with | |||
# the License. You may obtain a copy of the License at | |||
# | |||
# http://www.apache.org/licenses/LICENSE-2.0 | |||
# | |||
# Unless required by applicable law or agreed to in writing, software | |||
# distributed under the License is distributed on an "AS IS" BASIS, | |||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
# See the License for the specific language governing permissions and | |||
# limitations under the License. |
@@ -0,0 +1,29 @@ | |||
# The ASF licenses this file to You under the Apache License, Version 2.0 | |||
# (the "License"); you may not use this file except in compliance with | |||
# the License. You may obtain a copy of the License at | |||
# | |||
# http://www.apache.org/licenses/LICENSE-2.0 | |||
# | |||
# Unless required by applicable law or agreed to in writing, software | |||
# distributed under the License is distributed on an "AS IS" BASIS, | |||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
# See the License for the specific language governing permissions and | |||
# limitations under the License. | |||
#----------------------------------------------------------------------- | |||
#some test synonym mappings unlikely to appear in real input text | |||
aaafoo => aaabar | |||
bbbfoo => bbbfoo bbbbar | |||
cccfoo => cccbar cccbaz | |||
fooaaa,baraaa,bazaaa | |||
# Some synonym groups specific to this example | |||
GB,gib,gigabyte,gigabytes | |||
MB,mib,megabyte,megabytes | |||
Television, Televisions, TV, TVs | |||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming | |||
#after us won't split it into two words. | |||
# Synonym mappings can be used for spelling correction too | |||
pixima => pixma | |||
@@ -0,0 +1,115 @@ | |||
function get_class(name) { | |||
var clazz; | |||
try { | |||
// Java8 Nashorn | |||
clazz = eval("Java.type(name).class"); | |||
} catch(e) { | |||
// Java7 Rhino | |||
clazz = eval("Packages."+name); | |||
} | |||
return clazz; | |||
} | |||
function processAdd(cmd) { | |||
doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument | |||
var id = doc.getFieldValue("id"); | |||
logger.info("update-script#processAdd: id=" + id); | |||
// The idea here is to use the file's content_type value to | |||
// simplify into user-friendly values, such that types of, say, image/jpeg and image/tiff | |||
// are in an "Images" facet | |||
var ct = doc.getFieldValue("content_type"); | |||
if (ct) { | |||
// strip off semicolon onward | |||
var semicolon_index = ct.indexOf(';'); | |||
if (semicolon_index != -1) { | |||
ct = ct.substring(0,semicolon_index); | |||
} | |||
// and split type/subtype | |||
var ct_type = ct.substring(0,ct.indexOf('/')); | |||
var ct_subtype = ct.substring(ct.indexOf('/')+1); | |||
var doc_type; | |||
switch(true) { | |||
case /^application\/rtf/.test(ct) || /wordprocessing/.test(ct): | |||
doc_type = "doc"; | |||
break; | |||
case /html/.test(ct): | |||
doc_type = "html"; | |||
break; | |||
case /^image\/.*/.test(ct): | |||
doc_type = "image"; | |||
break; | |||
case /presentation|powerpoint/.test(ct): | |||
doc_type = "presentation"; | |||
break; | |||
case /spreadsheet|excel/.test(ct): | |||
doc_type = "spreadsheet"; | |||
break; | |||
case /^application\/pdf/.test(ct): | |||
doc_type = "pdf"; | |||
break; | |||
case /^text\/plain/.test(ct): | |||
doc_type = "text" | |||
break; | |||
default: | |||
break; | |||
} | |||
// TODO: error handling needed? What if there is no slash? | |||
if(doc_type) { doc.setField("doc_type", doc_type); } | |||
doc.setField("content_type_type_s", ct_type); | |||
doc.setField("content_type_subtype_s", ct_subtype); | |||
} | |||
var content = doc.getFieldValue("content"); | |||
if (!content) { | |||
return; //No content found, so we are done here | |||
} | |||
var analyzer = | |||
req.getCore().getLatestSchema() | |||
.getFieldTypeByName("text_email_url") | |||
.getIndexAnalyzer(); | |||
var token_stream = | |||
analyzer.tokenStream("content", content); | |||
var term_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.CharTermAttribute")); | |||
var type_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.TypeAttribute")); | |||
token_stream.reset(); | |||
while (token_stream.incrementToken()) { | |||
doc.addField(type_att.type().replace(/\<|\>/g,'').toLowerCase()+"_ss", term_att.toString()); | |||
} | |||
token_stream.end(); | |||
token_stream.close(); | |||
} | |||
function processDelete(cmd) { | |||
// no-op | |||
} | |||
function processMergeIndexes(cmd) { | |||
// no-op | |||
} | |||
function processCommit(cmd) { | |||
// no-op | |||
} | |||
function processRollback(cmd) { | |||
// no-op | |||
} | |||
function finish() { | |||
// no-op | |||
} |
@@ -0,0 +1,32 @@ | |||
<div id="query-box"> | |||
<form id="query-form" action="#{url_for_home}" method="GET"> | |||
$resource.find: | |||
<input type="text" id="q" name="q" style="width: 50%" value="$!esc.html($request.params.get('q'))"/> | |||
<input type="submit" value="$resource.submit"/> | |||
<div id="debug_query" class="debug"> | |||
<span id="parsed_query">$esc.html($response.response.debug.parsedquery)</span> | |||
</div> | |||
<input type="hidden" name="type" value="#current_type"/> | |||
#if("#current_locale"!="")<input type="hidden" value="locale" value="#current_locale"/>#end | |||
#foreach($fq in $response.responseHeader.params.getAll("fq")) | |||
<input type="hidden" name="fq" id="allFQs" value="$esc.html($fq)"/> | |||
#end | |||
</form> | |||
<div id="constraints"> | |||
#foreach($fq in $response.responseHeader.params.getAll("fq")) | |||
#set($previous_fq_count=$velocityCount - 1) | |||
#if($fq != '') | |||
> $fq<a href="#url_for_filters($response.responseHeader.params.fq.subList(0,$previous_fq_count))">x</a> | |||
#end | |||
#end | |||
</div> | |||
</div> | |||
<div id="browse_results"> | |||
#parse("results.vm") | |||
</div> | |||
@@ -0,0 +1,2 @@ | |||
## intentionally empty | |||
@@ -0,0 +1,12 @@ | |||
<div id="facet_$field.name"> | |||
<span class="facet-field">$resource.facet.top_phrases</span><br/> | |||
<ul id="tagcloud"> | |||
#foreach($facet in $sort.sort($field.values,"name")) | |||
<li data-weight="$math.mul($facet.count,1)"> | |||
<a href="#url_for_facet_filter($field.name, $facet.name)">$facet.name</a> | |||
</li> | |||
#end | |||
</ul> | |||
</div> |
@@ -0,0 +1,24 @@ | |||
#if($response.facetFields.size() > 0) | |||
#foreach($field in $response.facetFields) | |||
#if($field.values.size() > 0) | |||
#if($engine.resourceExists("facet_${field.name}.vm")) | |||
#parse("facet_${field.name}.vm") | |||
#else | |||
<div id="facet_$field.name" class="facet_field"> | |||
<span class="facet-field">#label("facet.${field.name}",$field.name)</span><br/> | |||
<ul> | |||
#foreach($facet in $field.values) | |||
<li><a href="#url_for_facet_filter($field.name, $facet.name)">#if($facet.name!=$null)#label("${field.name}.${facet.name}","${field.name}.${facet.name}")#else<em>missing</em>#end</a> ($facet.count)</li> | |||
#end | |||
</ul> | |||
</div> | |||
#end | |||
#end | |||
#end ## end if field.values > 0 | |||
#end ## end if facetFields > 0 | |||
@@ -0,0 +1,29 @@ | |||
<hr/> | |||
<div> | |||
<div id="admin"><a href="#url_root/index.html#/#{core_name}">Solr Admin</a></div> | |||
<a href="#" onclick='jQuery(".debug").toggle(); return false;'>toggle debug mode</a> | |||
<a href="#url_for_lens&wt=xml#if($debug)&debug=true#end">XML results</a> ## TODO: Add links for other formats, maybe dynamically? | |||
</div> | |||
<div> | |||
<a href="http://lucene.apache.org/solr">Solr Home Page</a> | |||
</div> | |||
<div class="debug"> | |||
<hr/> | |||
Request: | |||
<pre> | |||
$esc.html($request) | |||
</pre> | |||
<hr/> | |||
Debug: | |||
<pre> | |||
$esc.html($response.response.debug) | |||
</pre> | |||
</div> |
@@ -0,0 +1,290 @@ | |||
<title>Solr browse: #core_name</title> | |||
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/> | |||
<link rel="icon" type="image/x-icon" href="#{url_root}/img/favicon.ico"/> | |||
<link rel="shortcut icon" type="image/x-icon" href="#{url_root}/img/favicon.ico"/> | |||
<script type="text/javascript" src="#{url_root}/libs/jquery-3.4.1.min.js"></script> | |||
<script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/jquery.tx3-tag-cloud.js&contentType=text/javascript"></script> | |||
<script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/dropit.js&contentType=text/javascript"></script> | |||
<script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/js/jquery.autocomplete.js&contentType=text/javascript"></script> | |||
<script type="text/javascript"> | |||
$(document).ready(function() { | |||
$("#tagcloud").tx3TagCloud({ | |||
multiplier: 1 | |||
}); | |||
$('.menu').dropit(); | |||
$( document ).ajaxComplete(function() { | |||
$("#tagcloud").tx3TagCloud({ | |||
multiplier: 5 | |||
}); | |||
}); | |||
$('\#q').keyup(function() { | |||
$('#browse_results').load('#{url_for_home}?#lensNoQ&v.layout.enabled=false&v.template=results&q='+encodeURI($('\#q').val())); | |||
$("\#q").autocomplete('#{url_for_solr}/suggest', { | |||
extraParams: { | |||
'suggest.q': function() { return $("\#q").val();}, | |||
'suggest.build': 'true', | |||
'wt': 'json', | |||
} | |||
}).keydown(function(e) { | |||
if (e.keyCode === 13){ | |||
$("#query-form").trigger('submit'); | |||
} | |||
}); | |||
}); | |||
}); | |||
</script> | |||
<style> | |||
html { | |||
background-color: #F0F8FF; | |||
} | |||
body { | |||
font-family: Helvetica, Arial, sans-serif; | |||
font-size: 10pt; | |||
} | |||
#header { | |||
width: 100%; | |||
font-size: 20pt; | |||
} | |||
#header2 { | |||
margin-left:1200px; | |||
} | |||
#logo { | |||
width: 115px; | |||
margin: 0px 0px 0px 0px; | |||
border-style: none; | |||
} | |||
a { | |||
color: #305CB3; | |||
} | |||
a.hidden { | |||
display:none; | |||
} | |||
em { | |||
color: #FF833D; | |||
} | |||
.error { | |||
color: white; | |||
background-color: red; | |||
left: 210px; | |||
width:80%; | |||
position: relative; | |||
} | |||
.debug { display: none; font-size: 10pt} | |||
#debug_query { | |||
font-family: Helvetica, Arial, sans-serif; | |||
font-size: 10pt; | |||
font-weight: bold; | |||
} | |||
#parsed_query { | |||
font-family: Courier, Courier New, monospaced; | |||
font-size: 10pt; | |||
font-weight: normal; | |||
} | |||
#admin { | |||
text-align: right; | |||
vertical-align: top; | |||
} | |||
#query-form { | |||
width: 90%; | |||
} | |||
#query-box { | |||
padding: 5px; | |||
margin: 5px; | |||
font-weight: normal; | |||
font-size: 24px; | |||
letter-spacing: 0.08em; | |||
} | |||
#constraints { | |||
margin: 10px; | |||
} | |||
#tabs { } | |||
#tabs li { display: inline; font-size: 10px;} | |||
#tabs li a { border-radius: 20px; border: 2px solid #C1CDCD; padding: 10px;color: #42454a; background-color: #dedbde;} | |||
#tabs li a:hover { background-color: #f1f0ee; } | |||
#tabs li a.selected { color: #000; background-color: #f1f0ee; font-weight: bold; padding: 5px } | |||
#tabs li a.no_results { color: #000; background-color: #838B8B; font-style: italic; padding: 5px; pointer-events: none; | |||
cursor: default; text-decoration: none;} | |||
.pagination { | |||
width: 305px; | |||
border-radius: 25px; | |||
border: 2px solid #C1CDCD; | |||
padding: 20px; | |||
padding-left: 10%; | |||
background: #eee; | |||
margin-left: 190px; | |||
margin-top : 42px; | |||
padding-top: 5px; | |||
padding-bottom: 5px; | |||
text-align:left; | |||
} | |||
#results_list { width: 70%; } | |||
.result-document { | |||
border-radius: 25px; | |||
border: 2px solid #C1CDCD; | |||
padding: 10px; | |||
// width: 800px; | |||
// height: 120px; | |||
margin: 5px; | |||
// margin-left: 60px; | |||
// margin-right: 210px; | |||
// margin-bottom: 15px; | |||
transition: 1s ease; | |||
} | |||
.result-document:hover | |||
{ | |||
webkit-transform: scale(1.1); | |||
-ms-transform: scale(1.1); | |||
transform: scale(1.1); | |||
transition: 1s ease; | |||
} | |||
.result-document div { | |||
padding: 5px; | |||
} | |||
.result-title { | |||
width:60%; | |||
} | |||
.result-body { | |||
background: #ddd; | |||
} | |||
.result-document:nth-child(2n+1) { | |||
background-color: #FFFFFD; | |||
} | |||
#facets { | |||
margin: 5px; | |||
margin-top: 0px; | |||
padding: 5px; | |||
top: -20px; | |||
position: relative; | |||
float: right; | |||
width: 25%; | |||
} | |||
.facet-field { | |||
font-weight: bold; | |||
} | |||
#facets ul { | |||
list-style: none; | |||
margin: 0; | |||
margin-bottom: 5px; | |||
margin-top: 5px; | |||
padding-left: 10px; | |||
} | |||
#facets ul li { | |||
color: #999; | |||
padding: 2px; | |||
} | |||
div.facet_field { | |||
clear: left; | |||
} | |||
ul.tx3-tag-cloud { } | |||
ul.tx3-tag-cloud li { | |||
display: block; | |||
float: left; | |||
list-style: none; | |||
margin-right: 4px; | |||
} | |||
ul.tx3-tag-cloud li a { | |||
display: block; | |||
text-decoration: none; | |||
color: #c9c9c9; | |||
padding: 3px 10px; | |||
} | |||
ul.tx3-tag-cloud li a:hover { | |||
color: #000000; | |||
-webkit-transition: color 250ms linear; | |||
-moz-transition: color 250ms linear; | |||
-o-transition: color 250ms linear; | |||
-ms-transition: color 250ms linear; | |||
transition: color 250ms linear; | |||
} | |||
.dropit { | |||
list-style: none; | |||
padding: 0; | |||
margin: 0; | |||
} | |||
.dropit .dropit-trigger { position: relative; } | |||
.dropit .dropit-submenu { | |||
position: absolute; | |||
top: 100%; | |||
left: 0; /* dropdown left or right */ | |||
z-index: 1000; | |||
display: none; | |||
min-width: 150px; | |||
list-style: none; | |||
padding: 0; | |||
margin: 0; | |||
} | |||
.dropit .dropit-open .dropit-submenu { display: block; } | |||
<!--autocomplete css--> | |||
.ac_results { | |||
padding: 0px; | |||
border: 1px solid black; | |||
background-color: white; | |||
overflow: hidden; | |||
z-index: 99999; | |||
} | |||
.ac_results ul { | |||
width: 100%; | |||
list-style-position: outside; | |||
list-style: none; | |||
padding: 0; | |||
margin: 0; | |||
} | |||
.ac_results li { | |||
margin: 0px; | |||
padding: 2px 5px; | |||
cursor: default; | |||
display: block; | |||
font: menu; | |||
font-size: 12px; | |||
line-height: 16px; | |||
overflow: hidden; | |||
} | |||
.ac_loading { | |||
// background: white url('˜indicator.gif') right center no-repeat; | |||
} | |||
.ac_odd { | |||
background-color: #eee; | |||
} | |||
.ac_over { | |||
background-color: #0A246A; | |||
color: white; | |||
} | |||
</style> |
@@ -0,0 +1,77 @@ | |||
#set($docId = $doc.getFirstValue($request.schema.uniqueKeyField.name)) | |||
## Load Mime-Type List and Mapping | |||
#parse('mime_type_lists.vm') | |||
## Title | |||
#if($doc.getFieldValue('title')) | |||
#set($title = $esc.html($doc.getFirstValue('title'))) | |||
#else | |||
#set($title = "$doc.getFirstValue('id').substring($math.add(1,$doc.getFirstValue('id').lastIndexOf('/')))") | |||
#end | |||
## Date | |||
#if($doc.getFieldValue('attr_meta_creation_date')) | |||
#set($date = $esc.html($doc.getFirstValue('attr_meta_creation_date'))) | |||
#else | |||
#set($date = "No date found") | |||
#end | |||
## URL | |||
#if($doc.getFieldValue('url')) | |||
#set($url = $doc.getFieldValue('url')) | |||
#elseif($doc.getFieldValue('resourcename')) | |||
#set($url = "file:///$doc.getFirstValue('resourcename')") | |||
#else | |||
#set($url = "$doc.getFieldValue('id')") | |||
#end | |||
## Sort out Mime-Type | |||
#set($ct = $doc.getFirstValue('content_type').split(";").get(0)) | |||
#set($filename = $doc.getFirstValue('resourcename')) | |||
#set($filetype = false) | |||
#set($filetype = $mimeExtensionsMap.get($ct)) | |||
#if(!$filetype) | |||
#set($filetype = $filename.substring($filename.lastIndexOf(".")).substring(1)) | |||
#end | |||
#if(!$filetype) | |||
#set($filetype = "file") | |||
#end | |||
#if(!$supportedMimeTypes.contains($filetype)) | |||
#set($filetype = "file") | |||
#end | |||
<div class="result-document"> | |||
<span class="result-title"> | |||
<img src="#{url_root}/img/filetypes/${filetype}.png" align="center"> | |||
<b>$title</b> | |||
</span> | |||
<div> | |||
id: $docId </br> | |||
</div> | |||
#set($pad = "") | |||
#foreach($v in $response.response.highlighting.get($docId).get("content")) | |||
$pad$esc.html($v).replace("HL_START","<em>").replace("HL_END","</em>") | |||
#set($pad = " ... ") | |||
#end | |||
</div> | |||
<a href="#" class="debug" onclick='jQuery(this).next().toggle(); return false;'>toggle explain</a> | |||
<pre style="display: none;"> | |||
$esc.html($response.getExplainMap().get($doc.getFirstValue('id'))) | |||
</pre> | |||
<a href="#" class="debug" onclick='jQuery(this).next().toggle(); return false;'>show all fields</a> | |||
<pre style="display:none;"> | |||
#foreach($fieldname in $doc.fieldNames) | |||
<span>$fieldname :</span> | |||
<span>#foreach($value in $doc.getFieldValues($fieldname))$esc.html($value)#end</span> | |||
#end | |||
</pre> | |||
@@ -0,0 +1,97 @@ | |||
/* | |||
* Dropit v1.1.0 | |||
* http://dev7studios.com/dropit | |||
* | |||
* Copyright 2012, Dev7studios | |||
* Free to use and abuse under the MIT license. | |||
* http://www.opensource.org/licenses/mit-license.php | |||
*/ | |||
;(function($) { | |||
$.fn.dropit = function(method) { | |||
var methods = { | |||
init : function(options) { | |||
this.dropit.settings = $.extend({}, this.dropit.defaults, options); | |||
return this.each(function() { | |||
var $el = $(this), | |||
el = this, | |||
settings = $.fn.dropit.settings; | |||
// Hide initial submenus | |||
$el.addClass('dropit') | |||
.find('>'+ settings.triggerParentEl +':has('+ settings.submenuEl +')').addClass('dropit-trigger') | |||
.find(settings.submenuEl).addClass('dropit-submenu').hide(); | |||
// Open on click | |||
$el.off(settings.action).on(settings.action, settings.triggerParentEl +':has('+ settings.submenuEl +') > '+ settings.triggerEl +'', function(){ | |||
// Close click menu's if clicked again | |||
if(settings.action == 'click' && $(this).parents(settings.triggerParentEl).hasClass('dropit-open')){ | |||
settings.beforeHide.call(this); | |||
$(this).parents(settings.triggerParentEl).removeClass('dropit-open').find(settings.submenuEl).hide(); | |||
settings.afterHide.call(this); | |||
return false; | |||
} | |||
// Hide open menus | |||
settings.beforeHide.call(this); | |||
$('.dropit-open').removeClass('dropit-open').find('.dropit-submenu').hide(); | |||
settings.afterHide.call(this); | |||
// Open this menu | |||
settings.beforeShow.call(this); | |||
$(this).parents(settings.triggerParentEl).addClass('dropit-open').find(settings.submenuEl).show(); | |||
settings.afterShow.call(this); | |||
return false; | |||
}); | |||
// Close if outside click | |||
$(document).on('click', function(){ | |||
settings.beforeHide.call(this); | |||
$('.dropit-open').removeClass('dropit-open').find('.dropit-submenu').hide(); | |||
settings.afterHide.call(this); | |||
}); | |||
// If hover | |||
if(settings.action == 'mouseenter'){ | |||
$el.on('mouseleave', '.dropit-open', function(){ | |||
settings.beforeHide.call(this); | |||
$(this).removeClass('dropit-open').find(settings.submenuEl).hide(); | |||
settings.afterHide.call(this); | |||
}); | |||
} | |||
settings.afterLoad.call(this); | |||
}); | |||
} | |||
}; | |||
if (methods[method]) { | |||
return methods[method].apply(this, Array.prototype.slice.call(arguments, 1)); | |||
} else if (typeof method === 'object' || !method) { | |||
return methods.init.apply(this, arguments); | |||
} else { | |||
$.error( 'Method "' + method + '" does not exist in dropit plugin!'); | |||
} | |||
}; | |||
$.fn.dropit.defaults = { | |||
action: 'mouseenter', // The open action for the trigger | |||
submenuEl: 'ul', // The submenu element | |||
triggerEl: 'a', // The trigger element | |||
triggerParentEl: 'li', // The trigger parent element | |||
afterLoad: function(){}, // Triggers when plugin has loaded | |||
beforeShow: function(){}, // Triggers before submenu is shown | |||
afterShow: function(){}, // Triggers after submenu is shown | |||
beforeHide: function(){}, // Triggers before submenu is hidden | |||
afterHide: function(){} // Triggers before submenu is hidden | |||
}; | |||
$.fn.dropit.settings = {}; | |||
})(jQuery); |
@@ -0,0 +1,763 @@ | |||
/* | |||
* Autocomplete - jQuery plugin 1.1pre | |||
* | |||
* Copyright (c) 2007 Dylan Verheul, Dan G. Switzer, Anjesh Tuladhar, Jörn Zaefferer | |||
* | |||
* Dual licensed under the MIT and GPL licenses: | |||
* http://www.opensource.org/licenses/mit-license.php | |||
* http://www.gnu.org/licenses/gpl.html | |||
* | |||
* Revision: Id: jquery.autocomplete.js 5785 2008-07-12 10:37:33Z joern.zaefferer $ | |||
* | |||
*/ | |||
;(function($) { | |||
$.fn.extend({ | |||
autocomplete: function(urlOrData, options) { | |||
var isUrl = typeof urlOrData == "string"; | |||
options = $.extend({}, $.Autocompleter.defaults, { | |||
url: isUrl ? urlOrData : null, | |||
data: isUrl ? null : urlOrData, | |||
delay: isUrl ? $.Autocompleter.defaults.delay : 10, | |||
max: options && !options.scroll ? 10 : 150 | |||
}, options); | |||
// if highlight is set to false, replace it with a do-nothing function | |||
options.highlight = options.highlight || function(value) { return value; }; | |||
// if the formatMatch option is not specified, then use formatItem for backwards compatibility | |||
options.formatMatch = options.formatMatch || options.formatItem; | |||
return this.each(function() { | |||
new $.Autocompleter(this, options); | |||
}); | |||
}, | |||
result: function(handler) { | |||
return this.bind("result", handler); | |||
}, | |||
search: function(handler) { | |||
return this.trigger("search", [handler]); | |||
}, | |||
flushCache: function() { | |||
return this.trigger("flushCache"); | |||
}, | |||
setOptions: function(options){ | |||
return this.trigger("setOptions", [options]); | |||
}, | |||
unautocomplete: function() { | |||
return this.trigger("unautocomplete"); | |||
} | |||
}); | |||
$.Autocompleter = function(input, options) { | |||
var KEY = { | |||
UP: 38, | |||
DOWN: 40, | |||
DEL: 46, | |||
TAB: 9, | |||
RETURN: 13, | |||
ESC: 27, | |||
COMMA: 188, | |||
PAGEUP: 33, | |||
PAGEDOWN: 34, | |||
BACKSPACE: 8 | |||
}; | |||
// Create $ object for input element | |||
var $input = $(input).attr("autocomplete", "off").addClass(options.inputClass); | |||
var timeout; | |||
var previousValue = ""; | |||
var cache = $.Autocompleter.Cache(options); | |||
var hasFocus = 0; | |||
var lastKeyPressCode; | |||
var config = { | |||
mouseDownOnSelect: false | |||
}; | |||
var select = $.Autocompleter.Select(options, input, selectCurrent, config); | |||
var blockSubmit; | |||
// prevent form submit in opera when selecting with return key | |||
$.browser.opera && $(input.form).bind("submit.autocomplete", function() { | |||
if (blockSubmit) { | |||
blockSubmit = false; | |||
return false; | |||
} | |||
}); | |||
// only opera doesn't trigger keydown multiple times while pressed, others don't work with keypress at all | |||
$input.bind(($.browser.opera ? "keypress" : "keydown") + ".autocomplete", function(event) { | |||
// track last key pressed | |||
lastKeyPressCode = event.keyCode; | |||
switch(event.keyCode) { | |||
case KEY.UP: | |||
event.preventDefault(); | |||
if ( select.visible() ) { | |||
select.prev(); | |||
} else { | |||
onChange(0, true); | |||
} | |||
break; | |||
case KEY.DOWN: | |||
event.preventDefault(); | |||
if ( select.visible() ) { | |||
select.next(); | |||
} else { | |||
onChange(0, true); | |||
} | |||
break; | |||
case KEY.PAGEUP: | |||
event.preventDefault(); | |||
if ( select.visible() ) { | |||
select.pageUp(); | |||
} else { | |||
onChange(0, true); | |||
} | |||
break; | |||
case KEY.PAGEDOWN: | |||
event.preventDefault(); | |||
if ( select.visible() ) { | |||
select.pageDown(); | |||
} else { | |||
onChange(0, true); | |||
} | |||
break; | |||
// matches also semicolon | |||
case options.multiple && $.trim(options.multipleSeparator) == "," && KEY.COMMA: | |||
case KEY.TAB: | |||
case KEY.RETURN: | |||
if( selectCurrent() ) { | |||
// stop default to prevent a form submit, Opera needs special handling | |||
event.preventDefault(); | |||
blockSubmit = true; | |||
return false; | |||
} | |||
break; | |||
case KEY.ESC: | |||
select.hide(); | |||
break; | |||
default: | |||
clearTimeout(timeout); | |||
timeout = setTimeout(onChange, options.delay); | |||
break; | |||
} | |||
}).focus(function(){ | |||
// track whether the field has focus, we shouldn't process any | |||
// results if the field no longer has focus | |||
hasFocus++; | |||
}).blur(function() { | |||
hasFocus = 0; | |||
if (!config.mouseDownOnSelect) { | |||
hideResults(); | |||
} | |||
}).click(function() { | |||
// show select when clicking in a focused field | |||
if ( hasFocus++ > 1 && !select.visible() ) { | |||
onChange(0, true); | |||
} | |||
}).bind("search", function() { | |||
// TODO why not just specifying both arguments? | |||
var fn = (arguments.length > 1) ? arguments[1] : null; | |||
function findValueCallback(q, data) { | |||
var result; | |||
if( data && data.length ) { | |||
for (var i=0; i < data.length; i++) { | |||
if( data[i].result.toLowerCase() == q.toLowerCase() ) { | |||
result = data[i]; | |||
break; | |||
} | |||
} | |||
} | |||
if( typeof fn == "function" ) fn(result); | |||
else $input.trigger("result", result && [result.data, result.value]); | |||
} | |||
$.each(trimWords($input.val()), function(i, value) { | |||
request(value, findValueCallback, findValueCallback); | |||
}); | |||
}).bind("flushCache", function() { | |||
cache.flush(); | |||
}).bind("setOptions", function() { | |||
$.extend(options, arguments[1]); | |||
// if we've updated the data, repopulate | |||
if ( "data" in arguments[1] ) | |||
cache.populate(); | |||
}).bind("unautocomplete", function() { | |||
select.unbind(); | |||
$input.unbind(); | |||
$(input.form).unbind(".autocomplete"); | |||
}); | |||
function selectCurrent() { | |||
var selected = select.selected(); | |||
if( !selected ) | |||
return false; | |||
var v = selected.result; | |||
previousValue = v; | |||
if ( options.multiple ) { | |||
var words = trimWords($input.val()); | |||
if ( words.length > 1 ) { | |||
v = words.slice(0, words.length - 1).join( options.multipleSeparator ) + options.multipleSeparator + v; | |||
} | |||
v += options.multipleSeparator; | |||
} | |||
$input.val(v); | |||
hideResultsNow(); | |||
$input.trigger("result", [selected.data, selected.value]); | |||
return true; | |||
} | |||
function onChange(crap, skipPrevCheck) { | |||
if( lastKeyPressCode == KEY.DEL ) { | |||
select.hide(); | |||
return; | |||
} | |||
var currentValue = $input.val(); | |||
if ( !skipPrevCheck && currentValue == previousValue ) | |||
return; | |||
previousValue = currentValue; | |||
currentValue = lastWord(currentValue); | |||
if ( currentValue.length >= options.minChars) { | |||
$input.addClass(options.loadingClass); | |||
if (!options.matchCase) | |||
currentValue = currentValue.toLowerCase(); | |||
request(currentValue, receiveData, hideResultsNow); | |||
} else { | |||
stopLoading(); | |||
select.hide(); | |||
} | |||
}; | |||
function trimWords(value) { | |||
if ( !value ) { | |||
return [""]; | |||
} | |||
var words = value.split( options.multipleSeparator ); | |||
var result = []; | |||
$.each(words, function(i, value) { | |||
if ( $.trim(value) ) | |||
result[i] = $.trim(value); | |||
}); | |||
return result; | |||
} | |||
function lastWord(value) { | |||
if ( !options.multiple ) | |||
return value; | |||
var words = trimWords(value); | |||
return words[words.length - 1]; | |||
} | |||
// fills in the input box w/the first match (assumed to be the best match) | |||
// q: the term entered | |||
// sValue: the first matching result | |||
function autoFill(q, sValue){ | |||
// autofill in the complete box w/the first match as long as the user hasn't entered in more data | |||
// if the last user key pressed was backspace, don't autofill | |||
if( options.autoFill && (lastWord($input.val()).toLowerCase() == q.toLowerCase()) && lastKeyPressCode != KEY.BACKSPACE ) { | |||
// fill in the value (keep the case the user has typed) | |||
$input.val($input.val() + sValue.substring(lastWord(previousValue).length)); | |||
// select the portion of the value not typed by the user (so the next character will erase) | |||
$.Autocompleter.Selection(input, previousValue.length, previousValue.length + sValue.length); | |||
} | |||
}; | |||
function hideResults() { | |||
clearTimeout(timeout); | |||
timeout = setTimeout(hideResultsNow, 200); | |||
}; | |||
function hideResultsNow() { | |||
var wasVisible = select.visible(); | |||
select.hide(); | |||
clearTimeout(timeout); | |||
stopLoading(); | |||
if (options.mustMatch) { | |||
// call search and run callback | |||
$input.search( | |||
function (result){ | |||
// if no value found, clear the input box | |||
if( !result ) { | |||
if (options.multiple) { | |||
var words = trimWords($input.val()).slice(0, -1); | |||
$input.val( words.join(options.multipleSeparator) + (words.length ? options.multipleSeparator : "") ); | |||
} | |||
else | |||
$input.val( "" ); | |||
} | |||
} | |||
); | |||
} | |||
if (wasVisible) | |||
// position cursor at end of input field | |||
$.Autocompleter.Selection(input, input.value.length, input.value.length); | |||
}; | |||
function receiveData(q, data) { | |||
if ( data && data.length && hasFocus ) { | |||
stopLoading(); | |||
select.display(data, q); | |||
autoFill(q, data[0].value); | |||
select.show(); | |||
} else { | |||
hideResultsNow(); | |||
} | |||
}; | |||
function request(term, success, failure) { | |||
if (!options.matchCase) | |||
term = term.toLowerCase(); | |||
var data = cache.load(term); | |||
data = null; // Avoid buggy cache and go to Solr every time | |||
// recieve the cached data | |||
if (data && data.length) { | |||
success(term, data); | |||
// if an AJAX url has been supplied, try loading the data now | |||
} else if( (typeof options.url == "string") && (options.url.length > 0) ){ | |||
var extraParams = { | |||
timestamp: +new Date() | |||
}; | |||
$.each(options.extraParams, function(key, param) { | |||
extraParams[key] = typeof param == "function" ? param() : param; | |||
}); | |||
$.ajax({ | |||
// try to leverage ajaxQueue plugin to abort previous requests | |||
mode: "abort", | |||
// limit abortion to this input | |||
port: "autocomplete" + input.name, | |||
dataType: options.dataType, | |||
url: options.url, | |||
data: $.extend({ | |||
q: lastWord(term), | |||
limit: options.max | |||
}, extraParams), | |||
success: function(data) { | |||
var parsed = options.parse && options.parse(data) || parse(data); | |||
cache.add(term, parsed); | |||
success(term, parsed); | |||
} | |||
}); | |||
} else { | |||
// if we have a failure, we need to empty the list -- this prevents the the [TAB] key from selecting the last successful match | |||
select.emptyList(); | |||
failure(term); | |||
} | |||
}; | |||
function parse(data) { | |||
var parsed = []; | |||
var rows = data.split("\n"); | |||
for (var i=0; i < rows.length; i++) { | |||
var row = $.trim(rows[i]); | |||
if (row) { | |||
row = row.split("|"); | |||
parsed[parsed.length] = { | |||
data: row, | |||
value: row[0], | |||
result: options.formatResult && options.formatResult(row, row[0]) || row[0] | |||
}; | |||
} | |||
} | |||
return parsed; | |||
}; | |||
function stopLoading() { | |||
$input.removeClass(options.loadingClass); | |||
}; | |||
}; | |||
$.Autocompleter.defaults = { | |||
inputClass: "ac_input", | |||
resultsClass: "ac_results", | |||
loadingClass: "ac_loading", | |||
minChars: 1, | |||
delay: 400, | |||
matchCase: false, | |||
matchSubset: true, | |||
matchContains: false, | |||
cacheLength: 10, | |||
max: 100, | |||
mustMatch: false, | |||
extraParams: {}, | |||
selectFirst: false, | |||
formatItem: function(row) { return row[0]; }, | |||
formatMatch: null, | |||
autoFill: false, | |||
width: 0, | |||
multiple: false, | |||
multipleSeparator: ", ", | |||
highlight: function(value, term) { | |||
return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1") + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "<strong>$1</strong>"); | |||
}, | |||
scroll: true, | |||
scrollHeight: 180 | |||
}; | |||
$.Autocompleter.Cache = function(options) { | |||
var data = {}; | |||
var length = 0; | |||
function matchSubset(s, sub) { | |||
if (!options.matchCase) | |||
s = s.toLowerCase(); | |||
var i = s.indexOf(sub); | |||
if (options.matchContains == "word"){ | |||
i = s.toLowerCase().search("\\b" + sub.toLowerCase()); | |||
} | |||
if (i == -1) return false; | |||
return i == 0 || options.matchContains; | |||
}; | |||
function add(q, value) { | |||
if (length > options.cacheLength){ | |||
flush(); | |||
} | |||
if (!data[q]){ | |||
length++; | |||
} | |||
data[q] = value; | |||
} | |||
function populate(){ | |||
if( !options.data ) return false; | |||
// track the matches | |||
var stMatchSets = {}, | |||
nullData = 0; | |||
// no url was specified, we need to adjust the cache length to make sure it fits the local data store | |||
if( !options.url ) options.cacheLength = 1; | |||
// track all options for minChars = 0 | |||
stMatchSets[""] = []; | |||
// loop through the array and create a lookup structure | |||
for ( var i = 0, ol = options.data.length; i < ol; i++ ) { | |||
var rawValue = options.data[i]; | |||
// if rawValue is a string, make an array otherwise just reference the array | |||
rawValue = (typeof rawValue == "string") ? [rawValue] : rawValue; | |||
var value = options.formatMatch(rawValue, i+1, options.data.length); | |||
if ( value === false ) | |||
continue; | |||
var firstChar = value.charAt(0).toLowerCase(); | |||
// if no lookup array for this character exists, look it up now | |||
if( !stMatchSets[firstChar] ) | |||
stMatchSets[firstChar] = []; | |||
// if the match is a string | |||
var row = { | |||
value: value, | |||
data: rawValue, | |||
result: options.formatResult && options.formatResult(rawValue) || value | |||
}; | |||
// push the current match into the set list | |||
stMatchSets[firstChar].push(row); | |||
// keep track of minChars zero items | |||
if ( nullData++ < options.max ) { | |||
stMatchSets[""].push(row); | |||
} | |||
}; | |||
// add the data items to the cache | |||
$.each(stMatchSets, function(i, value) { | |||
// increase the cache size | |||
options.cacheLength++; | |||
// add to the cache | |||
add(i, value); | |||
}); | |||
} | |||
// populate any existing data | |||
setTimeout(populate, 25); | |||
function flush(){ | |||
data = {}; | |||
length = 0; | |||
} | |||
return { | |||
flush: flush, | |||
add: add, | |||
populate: populate, | |||
load: function(q) { | |||
if (!options.cacheLength || !length) | |||
return null; | |||
/* | |||
* if dealing w/local data and matchContains than we must make sure | |||
* to loop through all the data collections looking for matches | |||
*/ | |||
if( !options.url && options.matchContains ){ | |||
// track all matches | |||
var csub = []; | |||
// loop through all the data grids for matches | |||
for( var k in data ){ | |||
// don't search through the stMatchSets[""] (minChars: 0) cache | |||
// this prevents duplicates | |||
if( k.length > 0 ){ | |||
var c = data[k]; | |||
$.each(c, function(i, x) { | |||
// if we've got a match, add it to the array | |||
if (matchSubset(x.value, q)) { | |||
csub.push(x); | |||
} | |||
}); | |||
} | |||
} | |||
return csub; | |||
} else | |||
// if the exact item exists, use it | |||
if (data[q]){ | |||
return data[q]; | |||
} else | |||
if (options.matchSubset) { | |||
for (var i = q.length - 1; i >= options.minChars; i--) { | |||
var c = data[q.substr(0, i)]; | |||
if (c) { | |||
var csub = []; | |||
$.each(c, function(i, x) { | |||
if (matchSubset(x.value, q)) { | |||
csub[csub.length] = x; | |||
} | |||
}); | |||
return csub; | |||
} | |||
} | |||
} | |||
return null; | |||
} | |||
}; | |||
}; | |||
$.Autocompleter.Select = function (options, input, select, config) { | |||
var CLASSES = { | |||
ACTIVE: "ac_over" | |||
}; | |||
var listItems, | |||
active = -1, | |||
data, | |||
term = "", | |||
needsInit = true, | |||
element, | |||
list; | |||
// Create results | |||
function init() { | |||
if (!needsInit) | |||
return; | |||
element = $("<div/>") | |||
.hide() | |||
.addClass(options.resultsClass) | |||
.css("position", "absolute") | |||
.appendTo(document.body); | |||
list = $("<ul/>").appendTo(element).mouseover( function(event) { | |||
if(target(event).nodeName && target(event).nodeName.toUpperCase() == 'LI') { | |||
active = $("li", list).removeClass(CLASSES.ACTIVE).index(target(event)); | |||
$(target(event)).addClass(CLASSES.ACTIVE); | |||
} | |||
}).click(function(event) { | |||
$(target(event)).addClass(CLASSES.ACTIVE); | |||
select(); | |||
// TODO provide option to avoid setting focus again after selection? useful for cleanup-on-focus | |||
input.focus(); | |||
return false; | |||
}).mousedown(function() { | |||
config.mouseDownOnSelect = true; | |||
}).mouseup(function() { | |||
config.mouseDownOnSelect = false; | |||
}); | |||
if( options.width > 0 ) | |||
element.css("width", options.width); | |||
needsInit = false; | |||
} | |||
function target(event) { | |||
var element = event.target; | |||
while(element && element.tagName != "LI") | |||
element = element.parentNode; | |||
// more fun with IE, sometimes event.target is empty, just ignore it then | |||
if(!element) | |||
return []; | |||
return element; | |||
} | |||
function moveSelect(step) { | |||
listItems.slice(active, active + 1).removeClass(CLASSES.ACTIVE); | |||
movePosition(step); | |||
var activeItem = listItems.slice(active, active + 1).addClass(CLASSES.ACTIVE); | |||
if(options.scroll) { | |||
var offset = 0; | |||
listItems.slice(0, active).each(function() { | |||
offset += this.offsetHeight; | |||
}); | |||
if((offset + activeItem[0].offsetHeight - list.scrollTop()) > list[0].clientHeight) { | |||
list.scrollTop(offset + activeItem[0].offsetHeight - list.innerHeight()); | |||
} else if(offset < list.scrollTop()) { | |||
list.scrollTop(offset); | |||
} | |||
} | |||
}; | |||
function movePosition(step) { | |||
active += step; | |||
if (active < 0) { | |||
active = listItems.size() - 1; | |||
} else if (active >= listItems.size()) { | |||
active = 0; | |||
} | |||
} | |||
function limitNumberOfItems(available) { | |||
return options.max && options.max < available | |||
? options.max | |||
: available; | |||
} | |||
function fillList() { | |||
list.empty(); | |||
var max = limitNumberOfItems(data.length); | |||
for (var i=0; i < max; i++) { | |||
if (!data[i]) | |||
continue; | |||
var formatted = options.formatItem(data[i].data, i+1, max, data[i].value, term); | |||
if ( formatted === false ) | |||
continue; | |||
var li = $("<li/>").html( options.highlight(formatted, term) ).addClass(i%2 == 0 ? "ac_even" : "ac_odd").appendTo(list)[0]; | |||
$.data(li, "ac_data", data[i]); | |||
} | |||
listItems = list.find("li"); | |||
if ( options.selectFirst ) { | |||
listItems.slice(0, 1).addClass(CLASSES.ACTIVE); | |||
active = 0; | |||
} | |||
// apply bgiframe if available | |||
if ( $.fn.bgiframe ) | |||
list.bgiframe(); | |||
} | |||
return { | |||
display: function(d, q) { | |||
init(); | |||
data = d; | |||
term = q; | |||
fillList(); | |||
}, | |||
next: function() { | |||
moveSelect(1); | |||
}, | |||
prev: function() { | |||
moveSelect(-1); | |||
}, | |||
pageUp: function() { | |||
if (active != 0 && active - 8 < 0) { | |||
moveSelect( -active ); | |||
} else { | |||
moveSelect(-8); | |||
} | |||
}, | |||
pageDown: function() { | |||
if (active != listItems.size() - 1 && active + 8 > listItems.size()) { | |||
moveSelect( listItems.size() - 1 - active ); | |||
} else { | |||
moveSelect(8); | |||
} | |||
}, | |||
hide: function() { | |||
element && element.hide(); | |||
listItems && listItems.removeClass(CLASSES.ACTIVE); | |||
active = -1; | |||
}, | |||
visible : function() { | |||
return element && element.is(":visible"); | |||
}, | |||
current: function() { | |||
return this.visible() && (listItems.filter("." + CLASSES.ACTIVE)[0] || options.selectFirst && listItems[0]); | |||
}, | |||
show: function() { | |||
var offset = $(input).offset(); | |||
element.css({ | |||
width: typeof options.width == "string" || options.width > 0 ? options.width : $(input).width(), | |||
top: offset.top + input.offsetHeight, | |||
left: offset.left | |||
}).show(); | |||
if(options.scroll) { | |||
list.scrollTop(0); | |||
list.css({ | |||
maxHeight: options.scrollHeight, | |||
overflow: 'auto' | |||
}); | |||
if($.browser.msie && typeof document.body.style.maxHeight === "undefined") { | |||
var listHeight = 0; | |||
listItems.each(function() { | |||
listHeight += this.offsetHeight; | |||
}); | |||
var scrollbarsVisible = listHeight > options.scrollHeight; | |||
list.css('height', scrollbarsVisible ? options.scrollHeight : listHeight ); | |||
if (!scrollbarsVisible) { | |||
// IE doesn't recalculate width when scrollbar disappears | |||
listItems.width( list.width() - parseInt(listItems.css("padding-left")) - parseInt(listItems.css("padding-right")) ); | |||
} | |||
} | |||
} | |||
}, | |||
selected: function() { | |||
var selected = listItems && listItems.filter("." + CLASSES.ACTIVE).removeClass(CLASSES.ACTIVE); | |||
return selected && selected.length && $.data(selected[0], "ac_data"); | |||
}, | |||
emptyList: function (){ | |||
list && list.empty(); | |||
}, | |||
unbind: function() { | |||
element && element.remove(); | |||
} | |||
}; | |||
}; | |||
$.Autocompleter.Selection = function(field, start, end) { | |||
if( field.createTextRange ){ | |||
var selRange = field.createTextRange(); | |||
selRange.collapse(true); | |||
selRange.moveStart("character", start); | |||
selRange.moveEnd("character", end); | |||
selRange.select(); | |||
} else if( field.setSelectionRange ){ | |||
field.setSelectionRange(start, end); | |||
} else { | |||
if( field.selectionStart ){ | |||
field.selectionStart = start; | |||
field.selectionEnd = end; | |||
} | |||
} | |||
field.focus(); | |||
}; | |||
})(jQuery); |
@@ -0,0 +1,70 @@ | |||
/* | |||
* ---------------------------------------------------------------------------- | |||
* "THE BEER-WARE LICENSE" (Revision 42): | |||
* Tuxes3 wrote this file. As long as you retain this notice you | |||
* can do whatever you want with this stuff. If we meet some day, and you think | |||
* this stuff is worth it, you can buy me a beer in return Tuxes3 | |||
* ---------------------------------------------------------------------------- | |||
*/ | |||
(function($) | |||
{ | |||
var settings; | |||
$.fn.tx3TagCloud = function(options) | |||
{ | |||
// | |||
// DEFAULT SETTINGS | |||
// | |||
settings = $.extend({ | |||
multiplier : 1 | |||
}, options); | |||
main(this); | |||
} | |||
function main(element) | |||
{ | |||
// adding style attr | |||
element.addClass("tx3-tag-cloud"); | |||
addListElementFontSize(element); | |||
} | |||
/** | |||
* calculates the font size on each li element | |||
* according to their data-weight attribut | |||
*/ | |||
function addListElementFontSize(element) | |||
{ | |||
var hDataWeight = -9007199254740992; | |||
var lDataWeight = 9007199254740992; | |||
$.each(element.find("li"), function(){ | |||
cDataWeight = getDataWeight(this); | |||
if (cDataWeight == undefined) | |||
{ | |||
logWarning("No \"data-weight\" attribut defined on <li> element"); | |||
} | |||
else | |||
{ | |||
hDataWeight = cDataWeight > hDataWeight ? cDataWeight : hDataWeight; | |||
lDataWeight = cDataWeight < lDataWeight ? cDataWeight : lDataWeight; | |||
} | |||
}); | |||
$.each(element.find("li"), function(){ | |||
var dataWeight = getDataWeight(this); | |||
var percent = Math.abs((dataWeight - lDataWeight)/(lDataWeight - hDataWeight)); | |||
$(this).css('font-size', (1 + (percent * settings['multiplier'])) + "em"); | |||
}); | |||
} | |||
function getDataWeight(element) | |||
{ | |||
return parseInt($(element).attr("data-weight")); | |||
} | |||
function logWarning(message) | |||
{ | |||
console.log("[WARNING] " + Date.now() + " : " + message); | |||
} | |||
}(jQuery)); |
@@ -0,0 +1,42 @@ | |||
<html> | |||
<head> | |||
#parse("head.vm") | |||
</head> | |||
<body> | |||
<div id="header"> | |||
<a href="#url_for_home"><img src="#{url_root}/img/solr.svg" id="logo" title="Solr"/></a> $resource.powered_file_search | |||
</div> | |||
<div id="header2" onclick="javascript:locale_select()"> | |||
<ul class="menu"> | |||
<li> | |||
<a href="#"><img src="#{url_for_solr}/admin/file?file=/velocity/img/globe_256.png&contentType=image/png" id="locale_pic" title="locale_select" width="30px" height="27px"/></a> | |||
<ul> | |||
<li><a href="#url_for_locale('fr_FR')" #if("#current_locale"=="fr_FR")class="hidden"#end> | |||
<img src="#{url_for_solr}/admin/file?file=/velocity/img/france_640.png&contentType=image/png" id="french_flag" width="40px" height="40px"/>Français</a></li> | |||
<li><a href="#url_for_locale('de_DE')" #if("#current_locale"=="de_DE")class="hidden"#end> | |||
<img src="#{url_for_solr}/admin/file?file=/velocity/img/germany_640.png&contentType=image/png" id="german_flag" width="40px" height="40px"/>Deutsch</a></li> | |||
<li><a href="#url_for_locale('')" #if("#current_locale"=="")class="hidden"#end> | |||
<img src="#{url_for_solr}/admin/file?file=/velocity/img/english_640.png&contentType=image/png" id="english_flag" width="40px" height="40px"/>English</a></li> | |||
</ul> | |||
</li> | |||
</ul> | |||
</div> | |||
#if($response.response.error.code) | |||
<div class="error"> | |||
<h1>ERROR $response.response.error.code</h1> | |||
$response.response.error.msg | |||
</div> | |||
#else | |||
<div id="content"> | |||
$content | |||
</div> | |||
#end | |||
<div id="footer"> | |||
#parse("footer.vm") | |||
</div> | |||
</body> | |||
</html> |
@@ -0,0 +1,16 @@ | |||
#macro(lensFilterSortOnly)?#if($response.responseHeader.params.getAll("fq").size() > 0)&#fqs($response.responseHeader.params.getAll("fq"))#end#sort($request.params.getParams('sort'))#end | |||
#macro(lensNoQ)#lensFilterSortOnly&type=#current_type#if("#current_locale"!="")&locale=#current_locale#end#end | |||
#macro(lensNoType)#lensFilterSortOnly#q#if("#current_locale"!="")&locale=#current_locale#end#end | |||
#macro(lensNoLocale)#lensFilterSortOnly#q&type=#current_type#end | |||
## lens modified for example/files - to use fq from responseHeader rather than request, and #debug removed too as it is built into browse params now, also added type to lens | |||
#macro(lens)#lensNoQ#q#end | |||
## Macros defined custom for the "files" example | |||
#macro(url_for_type $type)#url_for_home#lensNoType&type=$type#end | |||
#macro(current_type)#if($response.responseHeader.params.type)${response.responseHeader.params.type}#{else}all#end#end | |||
#macro(url_for_locale $locale)#url_for_home#lensNoLocale#if($locale!="")&locale=$locale#end&start=$page.start#end | |||
#macro(current_locale)$!{response.responseHeader.params.locale}#end | |||
## Usage: #label(resource_key[, default_value]) - resource_key is used as label if no default value specified and no resource exists | |||
#macro(label $key $default)#if($resource.get($key).exists)${resource.get($key)}#else#if($default)$default#else${key}#end#end#end |
@@ -0,0 +1,68 @@ | |||
#** | |||
* Define some Mime-Types, short and long form | |||
*# | |||
## MimeType to extension map for detecting file type | |||
## and showing proper icon | |||
## List of types match the icons in /solr/img/filetypes | |||
## Short MimeType Names | |||
## Was called $supportedtypes | |||
#set($supportedMimeTypes = "7z;ai;aiff;asc;audio;bin;bz2;c;cfc;cfm;chm;class;conf;cpp;cs;css;csv;deb;divx;doc;dot;eml;enc;file;gif;gz;hlp;htm;html;image;iso;jar;java;jpeg;jpg;js;lua;m;mm;mov;mp3;mpg;odc;odf;odg;odi;odp;ods;odt;ogg;pdf;pgp;php;pl;png;ppt;ps;py;ram;rar;rb;rm;rpm;rtf;sig;sql;swf;sxc;sxd;sxi;sxw;tar;tex;tgz;txt;vcf;video;vsd;wav;wma;wmv;xls;xml;xpi;xvid;zip") | |||
## Long Form: map MimeType headers to our Short names | |||
## Was called $extMap | |||
#set( $mimeExtensionsMap = { | |||
"application/x-7z-compressed": "7z", | |||
"application/postscript": "ai", | |||
"application/pgp-signature": "asc", | |||
"application/octet-stream": "bin", | |||
"application/x-bzip2": "bz2", | |||
"text/x-c": "c", | |||
"application/vnd.ms-htmlhelp": "chm", | |||
"application/java-vm": "class", | |||
"text/css": "css", | |||
"text/csv": "csv", | |||
"application/x-debian-package": "deb", | |||
"application/msword": "doc", | |||
"message/rfc822": "eml", | |||
"image/gif": "gif", | |||
"application/winhlp": "hlp", | |||
"text/html": "html", | |||
"application/java-archive": "jar", | |||
"text/x-java-source": "java", | |||
"image/jpeg": "jpeg", | |||
"application/javascript": "js", | |||
"application/vnd.oasis.opendocument.chart": "odc", | |||
"application/vnd.oasis.opendocument.formula": "odf", | |||
"application/vnd.oasis.opendocument.graphics": "odg", | |||
"application/vnd.oasis.opendocument.image": "odi", | |||
"application/vnd.oasis.opendocument.presentation": "odp", | |||
"application/vnd.oasis.opendocument.spreadsheet": "ods", | |||
"application/vnd.oasis.opendocument.text": "odt", | |||
"application/pdf": "pdf", | |||
"application/pgp-encrypted": "pgp", | |||
"image/png": "png", | |||
"application/vnd.ms-powerpoint": "ppt", | |||
"audio/x-pn-realaudio": "ram", | |||
"application/x-rar-compressed": "rar", | |||
"application/vnd.rn-realmedia": "rm", | |||
"application/rtf": "rtf", | |||
"application/x-shockwave-flash": "swf", | |||
"application/vnd.sun.xml.calc": "sxc", | |||
"application/vnd.sun.xml.draw": "sxd", | |||
"application/vnd.sun.xml.impress": "sxi", | |||
"application/vnd.sun.xml.writer": "sxw", | |||
"application/x-tar": "tar", | |||
"application/x-tex": "tex", | |||
"text/plain": "txt", | |||
"text/x-vcard": "vcf", | |||
"application/vnd.visio": "vsd", | |||
"audio/x-wav": "wav", | |||
"audio/x-ms-wma": "wma", | |||
"video/x-ms-wmv": "wmv", | |||
"application/vnd.ms-excel": "xls", | |||
"application/xml": "xml", | |||
"application/x-xpinstall": "xpi", | |||
"application/zip": "zip" | |||
}) |
@@ -0,0 +1,20 @@ | |||
<div id="facets"> | |||
#parse("facets.vm") | |||
</div> | |||
<div id="results_list"> | |||
<div class="pagination"> | |||
<span class="results-found">$page.results_found</span> $resource.results_found_in.insert(${response.responseHeader.QTime}) | |||
$resource.page_of.insert($page.current_page_number,$page.page_count) | |||
</div> | |||
#parse("results_list.vm") | |||
<div class="pagination"> | |||
#link_to_previous_page | |||
<span class="results-found">$page.results_found</span> $resource.results_found. | |||
$resource.page_of.insert($page.current_page_number,$page.page_count) | |||
#link_to_next_page | |||
</div> | |||
</div> |
@@ -0,0 +1,21 @@ | |||
<ul id="tabs"> | |||
<li><a href="#url_for_type('all')" #if("#current_type"=="all")class="selected"#end>$resource.type.all ($response.response.facet_counts.facet_queries.all_types)</a></li> | |||
#foreach($type in $response.response.facet_counts.facet_fields.doc_type) | |||
#if($type.key) | |||
<li><a href="#url_for_type($type.key)" #if($type.value=="0")class="no_results"#end #if("#current_type"==$type.key)class="selected"#end> #label("type.${type.key}.label", $type.key) ($type.value)</a></li> | |||
#else | |||
#if($type.value > 0) | |||
<li><a href="#url_for_type('unknown')" #if("#current_type"=="unknown")class="selected"#end>$resource.type.unknown ($type.value)</a></li> | |||
#end | |||
#end | |||
#end | |||
</ul> | |||
<div id="results"> | |||
#foreach($doc in $response.results) | |||
#parse("hit.vm") | |||
#end | |||
</div> | |||
@@ -42,9 +42,9 @@ Help() | |||
Import() | |||
{ | |||
docker exec -it solr solr create_core -c $core | |||
docker exec -it solr solr create_core -c $core -d custom | |||
docker exec -ti --user=solr solr bash -c "cp -r /opt/solr/example/files/conf/* /var/solr/data/$core/conf/" | |||
#docker exec -ti --user=solr solr bash -c "cp -r /opt/solr/example/files/conf/* /var/solr/data/$core/conf/" | |||
docker restart solr | |||
@@ -72,7 +72,7 @@ while getopts ":hlimzaes" option; do | |||
exit;; | |||
z) # index all | |||
core="all" | |||
location="data/2018 (10381)" | |||
location="data/pop_rtfs" | |||
Import | |||
exit;; | |||
a) # index ACTIVE folder |