A search interface for the Performing Patents Otherwise publication as part of the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office OPS API. https://patents.copim.ac.uk
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

542 lines
30KB

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!-- Solr managed schema - automatically generated - DO NOT EDIT -->
  3. <schema name="example-data-driven-schema" version="1.6">
  4. <uniqueKey>id</uniqueKey>
  5. <fieldType name="ancestor_path" class="solr.TextField">
  6. <analyzer type="index">
  7. <tokenizer class="solr.KeywordTokenizerFactory"/>
  8. </analyzer>
  9. <analyzer type="query">
  10. <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/>
  11. </analyzer>
  12. </fieldType>
  13. <fieldType name="binary" class="solr.BinaryField"/>
  14. <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
  15. <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
  16. <fieldType name="currency" class="solr.CurrencyFieldType" amountLongSuffix="_l_ns" codeStrSuffix="_s_ns" defaultCurrency="USD" currencyConfig="currency.xml" />
  17. <fieldType name="descendent_path" class="solr.TextField">
  18. <analyzer type="index">
  19. <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/>
  20. </analyzer>
  21. <analyzer type="query">
  22. <tokenizer class="solr.KeywordTokenizerFactory"/>
  23. </analyzer>
  24. </fieldType>
  25. <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
  26. <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
  27. <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" maxDistErr="0.001" distErrPct="0.025" distanceUnits="kilometers"/>
  28. <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
  29. <analyzer>
  30. <tokenizer class="solr.KeywordTokenizerFactory"/>
  31. <filter class="solr.LowerCaseFilterFactory"/>
  32. </analyzer>
  33. </fieldType>
  34. <fieldType name="phonetic_en" class="solr.TextField" indexed="true" stored="false">
  35. <analyzer>
  36. <tokenizer class="solr.StandardTokenizerFactory"/>
  37. <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
  38. </analyzer>
  39. </fieldType>
  40. <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
  41. <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
  42. <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
  43. <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
  44. <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
  45. <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
  46. <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
  47. <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
  48. <fieldType name="plong" class="solr.LongPointField" docValues="true"/>
  49. <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
  50. <fieldType name="point" class="solr.PointType" subFieldSuffix="_d" dimension="2"/>
  51. <fieldType name="random" class="solr.RandomSortField" indexed="true"/>
  52. <fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
  53. <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/>
  54. <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
  55. <analyzer>
  56. <tokenizer class="solr.StandardTokenizerFactory"/>
  57. <filter class="solr.LowerCaseFilterFactory"/>
  58. <filter class="solr.StopFilterFactory" words="lang/stopwords_ar.txt" ignoreCase="true"/>
  59. <filter class="solr.ArabicNormalizationFilterFactory"/>
  60. <filter class="solr.ArabicStemFilterFactory"/>
  61. </analyzer>
  62. </fieldType>
  63. <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
  64. <analyzer>
  65. <tokenizer class="solr.StandardTokenizerFactory"/>
  66. <filter class="solr.LowerCaseFilterFactory"/>
  67. <filter class="solr.StopFilterFactory" words="lang/stopwords_bg.txt" ignoreCase="true"/>
  68. <filter class="solr.BulgarianStemFilterFactory"/>
  69. </analyzer>
  70. </fieldType>
  71. <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
  72. <analyzer>
  73. <tokenizer class="solr.StandardTokenizerFactory"/>
  74. <filter class="solr.ElisionFilterFactory" articles="lang/contractions_ca.txt" ignoreCase="true"/>
  75. <filter class="solr.LowerCaseFilterFactory"/>
  76. <filter class="solr.StopFilterFactory" words="lang/stopwords_ca.txt" ignoreCase="true"/>
  77. <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
  78. </analyzer>
  79. </fieldType>
  80. <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
  81. <analyzer>
  82. <tokenizer class="solr.StandardTokenizerFactory"/>
  83. <filter class="solr.CJKWidthFilterFactory"/>
  84. <filter class="solr.LowerCaseFilterFactory"/>
  85. <filter class="solr.CJKBigramFilterFactory"/>
  86. </analyzer>
  87. </fieldType>
  88. <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
  89. <analyzer>
  90. <tokenizer class="solr.StandardTokenizerFactory"/>
  91. <filter class="solr.LowerCaseFilterFactory"/>
  92. <filter class="solr.StopFilterFactory" words="lang/stopwords_cz.txt" ignoreCase="true"/>
  93. <filter class="solr.CzechStemFilterFactory"/>
  94. </analyzer>
  95. </fieldType>
  96. <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
  97. <analyzer>
  98. <tokenizer class="solr.StandardTokenizerFactory"/>
  99. <filter class="solr.LowerCaseFilterFactory"/>
  100. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_da.txt" ignoreCase="true"/>
  101. <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
  102. </analyzer>
  103. </fieldType>
  104. <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
  105. <analyzer>
  106. <tokenizer class="solr.StandardTokenizerFactory"/>
  107. <filter class="solr.LowerCaseFilterFactory"/>
  108. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_de.txt" ignoreCase="true"/>
  109. <filter class="solr.GermanNormalizationFilterFactory"/>
  110. <filter class="solr.GermanLightStemFilterFactory"/>
  111. </analyzer>
  112. </fieldType>
  113. <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
  114. <analyzer>
  115. <tokenizer class="solr.StandardTokenizerFactory"/>
  116. <filter class="solr.GreekLowerCaseFilterFactory"/>
  117. <filter class="solr.StopFilterFactory" words="lang/stopwords_el.txt" ignoreCase="false"/>
  118. <filter class="solr.GreekStemFilterFactory"/>
  119. </analyzer>
  120. </fieldType>
  121. <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
  122. <analyzer type="index">
  123. <tokenizer class="solr.StandardTokenizerFactory"/>
  124. <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
  125. <filter class="solr.LowerCaseFilterFactory"/>
  126. <filter class="solr.EnglishPossessiveFilterFactory"/>
  127. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  128. <filter class="solr.PorterStemFilterFactory"/>
  129. </analyzer>
  130. <analyzer type="query">
  131. <tokenizer class="solr.StandardTokenizerFactory"/>
  132. <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
  133. <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
  134. <filter class="solr.LowerCaseFilterFactory"/>
  135. <filter class="solr.EnglishPossessiveFilterFactory"/>
  136. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  137. <filter class="solr.PorterStemFilterFactory"/>
  138. </analyzer>
  139. </fieldType>
  140. <fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
  141. <analyzer type="index">
  142. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  143. <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
  144. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/>
  145. <filter class="solr.LowerCaseFilterFactory"/>
  146. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  147. <filter class="solr.PorterStemFilterFactory"/>
  148. <filter class="solr.FlattenGraphFilterFactory" />
  149. </analyzer>
  150. <analyzer type="query">
  151. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  152. <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
  153. <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
  154. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
  155. <filter class="solr.LowerCaseFilterFactory"/>
  156. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  157. <filter class="solr.PorterStemFilterFactory"/>
  158. </analyzer>
  159. </fieldType>
  160. <fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
  161. <analyzer type="index">
  162. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  163. <filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/>
  164. <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
  165. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/>
  166. <filter class="solr.LowerCaseFilterFactory"/>
  167. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  168. <filter class="solr.EnglishMinimalStemFilterFactory"/>
  169. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  170. <filter class="solr.FlattenGraphFilterFactory" />
  171. </analyzer>
  172. <analyzer type="query">
  173. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  174. <filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/>
  175. <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
  176. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/>
  177. <filter class="solr.LowerCaseFilterFactory"/>
  178. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  179. <filter class="solr.EnglishMinimalStemFilterFactory"/>
  180. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  181. </analyzer>
  182. </fieldType>
  183. <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
  184. <analyzer>
  185. <tokenizer class="solr.StandardTokenizerFactory"/>
  186. <filter class="solr.LowerCaseFilterFactory"/>
  187. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_es.txt" ignoreCase="true"/>
  188. <filter class="solr.SpanishLightStemFilterFactory"/>
  189. </analyzer>
  190. </fieldType>
  191. <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
  192. <analyzer>
  193. <tokenizer class="solr.StandardTokenizerFactory"/>
  194. <filter class="solr.LowerCaseFilterFactory"/>
  195. <filter class="solr.StopFilterFactory" words="lang/stopwords_eu.txt" ignoreCase="true"/>
  196. <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
  197. </analyzer>
  198. </fieldType>
  199. <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
  200. <analyzer>
  201. <charFilter class="solr.PersianCharFilterFactory"/>
  202. <tokenizer class="solr.StandardTokenizerFactory"/>
  203. <filter class="solr.LowerCaseFilterFactory"/>
  204. <filter class="solr.ArabicNormalizationFilterFactory"/>
  205. <filter class="solr.PersianNormalizationFilterFactory"/>
  206. <filter class="solr.StopFilterFactory" words="lang/stopwords_fa.txt" ignoreCase="true"/>
  207. </analyzer>
  208. </fieldType>
  209. <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
  210. <analyzer>
  211. <tokenizer class="solr.StandardTokenizerFactory"/>
  212. <filter class="solr.LowerCaseFilterFactory"/>
  213. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fi.txt" ignoreCase="true"/>
  214. <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
  215. </analyzer>
  216. </fieldType>
  217. <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
  218. <analyzer>
  219. <tokenizer class="solr.StandardTokenizerFactory"/>
  220. <filter class="solr.ElisionFilterFactory" articles="lang/contractions_fr.txt" ignoreCase="true"/>
  221. <filter class="solr.LowerCaseFilterFactory"/>
  222. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fr.txt" ignoreCase="true"/>
  223. <filter class="solr.FrenchLightStemFilterFactory"/>
  224. </analyzer>
  225. </fieldType>
  226. <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
  227. <analyzer>
  228. <tokenizer class="solr.StandardTokenizerFactory"/>
  229. <filter class="solr.ElisionFilterFactory" articles="lang/contractions_ga.txt" ignoreCase="true"/>
  230. <filter class="solr.StopFilterFactory" words="lang/hyphenations_ga.txt" ignoreCase="true"/>
  231. <filter class="solr.IrishLowerCaseFilterFactory"/>
  232. <filter class="solr.StopFilterFactory" words="lang/stopwords_ga.txt" ignoreCase="true"/>
  233. <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
  234. </analyzer>
  235. </fieldType>
  236. <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
  237. <analyzer type="index">
  238. <tokenizer class="solr.StandardTokenizerFactory"/>
  239. <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
  240. <filter class="solr.LowerCaseFilterFactory"/>
  241. </analyzer>
  242. <analyzer type="query">
  243. <tokenizer class="solr.StandardTokenizerFactory"/>
  244. <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
  245. <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
  246. <filter class="solr.LowerCaseFilterFactory"/>
  247. </analyzer>
  248. </fieldType>
  249. <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
  250. <analyzer type="index">
  251. <tokenizer class="solr.StandardTokenizerFactory"/>
  252. <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
  253. <filter class="solr.LowerCaseFilterFactory"/>
  254. <filter class="solr.ReversedWildcardFilterFactory" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/>
  255. </analyzer>
  256. <analyzer type="query">
  257. <tokenizer class="solr.StandardTokenizerFactory"/>
  258. <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
  259. <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
  260. <filter class="solr.LowerCaseFilterFactory"/>
  261. </analyzer>
  262. </fieldType>
  263. <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
  264. <analyzer>
  265. <tokenizer class="solr.StandardTokenizerFactory"/>
  266. <filter class="solr.LowerCaseFilterFactory"/>
  267. <filter class="solr.StopFilterFactory" words="lang/stopwords_gl.txt" ignoreCase="true"/>
  268. <filter class="solr.GalicianStemFilterFactory"/>
  269. </analyzer>
  270. </fieldType>
  271. <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
  272. <analyzer>
  273. <tokenizer class="solr.StandardTokenizerFactory"/>
  274. <filter class="solr.LowerCaseFilterFactory"/>
  275. <filter class="solr.IndicNormalizationFilterFactory"/>
  276. <filter class="solr.HindiNormalizationFilterFactory"/>
  277. <filter class="solr.StopFilterFactory" words="lang/stopwords_hi.txt" ignoreCase="true"/>
  278. <filter class="solr.HindiStemFilterFactory"/>
  279. </analyzer>
  280. </fieldType>
  281. <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
  282. <analyzer>
  283. <tokenizer class="solr.StandardTokenizerFactory"/>
  284. <filter class="solr.LowerCaseFilterFactory"/>
  285. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_hu.txt" ignoreCase="true"/>
  286. <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
  287. </analyzer>
  288. </fieldType>
  289. <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
  290. <analyzer>
  291. <tokenizer class="solr.StandardTokenizerFactory"/>
  292. <filter class="solr.LowerCaseFilterFactory"/>
  293. <filter class="solr.StopFilterFactory" words="lang/stopwords_hy.txt" ignoreCase="true"/>
  294. <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
  295. </analyzer>
  296. </fieldType>
  297. <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
  298. <analyzer>
  299. <tokenizer class="solr.StandardTokenizerFactory"/>
  300. <filter class="solr.LowerCaseFilterFactory"/>
  301. <filter class="solr.StopFilterFactory" words="lang/stopwords_id.txt" ignoreCase="true"/>
  302. <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
  303. </analyzer>
  304. </fieldType>
  305. <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
  306. <analyzer>
  307. <tokenizer class="solr.StandardTokenizerFactory"/>
  308. <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt" ignoreCase="true"/>
  309. <filter class="solr.LowerCaseFilterFactory"/>
  310. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_it.txt" ignoreCase="true"/>
  311. <filter class="solr.ItalianLightStemFilterFactory"/>
  312. </analyzer>
  313. </fieldType>
  314. <fieldType name="text_ja" class="solr.TextField" autoGeneratePhraseQueries="false" positionIncrementGap="100">
  315. <analyzer>
  316. <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
  317. <filter class="solr.JapaneseBaseFormFilterFactory"/>
  318. <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt"/>
  319. <filter class="solr.CJKWidthFilterFactory"/>
  320. <filter class="solr.StopFilterFactory" words="lang/stopwords_ja.txt" ignoreCase="true"/>
  321. <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
  322. <filter class="solr.LowerCaseFilterFactory"/>
  323. </analyzer>
  324. </fieldType>
  325. <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
  326. <analyzer>
  327. <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
  328. <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
  329. <filter class="solr.KoreanReadingFormFilterFactory" />
  330. <filter class="solr.LowerCaseFilterFactory" />
  331. </analyzer>
  332. </fieldType>
  333. <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
  334. <analyzer>
  335. <tokenizer class="solr.StandardTokenizerFactory"/>
  336. <filter class="solr.LowerCaseFilterFactory"/>
  337. <filter class="solr.StopFilterFactory" words="lang/stopwords_lv.txt" ignoreCase="true"/>
  338. <filter class="solr.LatvianStemFilterFactory"/>
  339. </analyzer>
  340. </fieldType>
  341. <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
  342. <analyzer>
  343. <tokenizer class="solr.StandardTokenizerFactory"/>
  344. <filter class="solr.LowerCaseFilterFactory"/>
  345. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_nl.txt" ignoreCase="true"/>
  346. <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
  347. <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
  348. </analyzer>
  349. </fieldType>
  350. <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
  351. <analyzer>
  352. <tokenizer class="solr.StandardTokenizerFactory"/>
  353. <filter class="solr.LowerCaseFilterFactory"/>
  354. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_no.txt" ignoreCase="true"/>
  355. <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
  356. </analyzer>
  357. </fieldType>
  358. <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
  359. <analyzer>
  360. <tokenizer class="solr.StandardTokenizerFactory"/>
  361. <filter class="solr.LowerCaseFilterFactory"/>
  362. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_pt.txt" ignoreCase="true"/>
  363. <filter class="solr.PortugueseLightStemFilterFactory"/>
  364. </analyzer>
  365. </fieldType>
  366. <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
  367. <analyzer>
  368. <tokenizer class="solr.StandardTokenizerFactory"/>
  369. <filter class="solr.LowerCaseFilterFactory"/>
  370. <filter class="solr.StopFilterFactory" words="lang/stopwords_ro.txt" ignoreCase="true"/>
  371. <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
  372. </analyzer>
  373. </fieldType>
  374. <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
  375. <analyzer>
  376. <tokenizer class="solr.StandardTokenizerFactory"/>
  377. <filter class="solr.LowerCaseFilterFactory"/>
  378. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_ru.txt" ignoreCase="true"/>
  379. <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
  380. </analyzer>
  381. </fieldType>
  382. <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
  383. <analyzer>
  384. <tokenizer class="solr.StandardTokenizerFactory"/>
  385. <filter class="solr.LowerCaseFilterFactory"/>
  386. <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_sv.txt" ignoreCase="true"/>
  387. <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
  388. </analyzer>
  389. </fieldType>
  390. <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
  391. <analyzer>
  392. <tokenizer class="solr.ThaiTokenizerFactory"/>
  393. <filter class="solr.LowerCaseFilterFactory"/>
  394. <filter class="solr.StopFilterFactory" words="lang/stopwords_th.txt" ignoreCase="true"/>
  395. </analyzer>
  396. </fieldType>
  397. <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
  398. <analyzer>
  399. <tokenizer class="solr.StandardTokenizerFactory"/>
  400. <filter class="solr.TurkishLowerCaseFilterFactory"/>
  401. <filter class="solr.StopFilterFactory" words="lang/stopwords_tr.txt" ignoreCase="false"/>
  402. <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
  403. </analyzer>
  404. </fieldType>
  405. <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
  406. <analyzer>
  407. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  408. </analyzer>
  409. </fieldType>
  410. <fieldType name="text_email_url" class="solr.TextField">
  411. <analyzer>
  412. <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/>
  413. <filter class="solr.TypeTokenFilterFactory" types="email_url_types.txt" useWhitelist="true"/>
  414. </analyzer>
  415. </fieldType>
  416. <fieldType name="text_shingles" class="solr.TextField" positionIncrementGap="100" multiValued="true">
  417. <analyzer type="index">
  418. <tokenizer class="solr.StandardTokenizerFactory"/>
  419. <!-- <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="false" /> -->
  420. <filter class="solr.LengthFilterFactory" min="2" max="18"/>
  421. <filter class="solr.LowerCaseFilterFactory"/>
  422. <filter class="solr.PatternReplaceFilterFactory" pattern="(^[^a-z]+$)" replacement="" replace="all"/>
  423. <filter class="solr.ShingleFilterFactory" minShingleSize="3" maxShingleSize="3"
  424. outputUnigrams="false" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="*"/>
  425. <filter class="solr.PatternReplaceFilterFactory" pattern="(.*[\*].*)" replacement=""/>
  426. <filter class="solr.TrimFilterFactory"/>
  427. <!-- PRFF could have removed everything down to an empty string, remove if so -->
  428. <filter class="solr.LengthFilterFactory" min="1" max="100"/>
  429. </analyzer>
  430. <analyzer type="query">
  431. <tokenizer class="solr.KeywordTokenizerFactory"/>
  432. <filter class="solr.LowerCaseFilterFactory"/>
  433. </analyzer>
  434. </fieldType>
  435. <field name="id" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
  436. <field name="_version_" type="plong" indexed="true" stored="true"/>
  437. <field name="content_type" type="string" indexed="true" stored="true"/>
  438. <field name="doc_type" type="string" indexed="true" stored="true"/>
  439. <field name="title" type="string" indexed="true" stored="true"/>
  440. <field name="language" type="string" indexed="true" stored="true"/>
  441. <field name="content" type="text_general" multiValued="false" indexed="true" stored="true"/>
  442. <field name="text_shingles" type="text_shingles" indexed="true" stored="false"/>
  443. <field name="_text_" type="text_general" multiValued="true" indexed="true" stored="false"/>
  444. <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/>
  445. <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/>
  446. <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/>
  447. <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/>
  448. <dynamicField name="*_coordinate" type="pdouble" indexed="true" stored="false"/>
  449. <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
  450. <dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/>
  451. <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
  452. <dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/>
  453. <dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/>
  454. <dynamicField name="random_*" type="random"/>
  455. <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/>
  456. <dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/>
  457. <dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/>
  458. <dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/>
  459. <dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/>
  460. <dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/>
  461. <dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/>
  462. <dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/>
  463. <dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/>
  464. <dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/>
  465. <dynamicField name="*_txt_fa" type="text_fa" indexed="true" stored="true"/>
  466. <dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/>
  467. <dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/>
  468. <dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/>
  469. <dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/>
  470. <dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/>
  471. <dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/>
  472. <dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/>
  473. <dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/>
  474. <dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/>
  475. <dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/>
  476. <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/>
  477. <dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/>
  478. <dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/>
  479. <dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/>
  480. <dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/>
  481. <dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/>
  482. <dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/>
  483. <dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/>
  484. <dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/>
  485. <dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/>
  486. <dynamicField name="*_point" type="point" indexed="true" stored="true"/>
  487. <dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/>
  488. <dynamicField name="attr_*" type="text_general" multiValued="true" indexed="true" stored="true"/>
  489. <dynamicField name="*_l_ns" type="plong" indexed="true" stored="false"/>
  490. <dynamicField name="*_s_ns" type="string" indexed="true" stored="false"/>
  491. <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/>
  492. <dynamicField name="*_dts" type="pdate" multiValued="true" indexed="true" stored="true"/>
  493. <dynamicField name="*_is" type="pints" indexed="true" stored="true"/>
  494. <dynamicField name="*_ss" type="strings" indexed="true" stored="true"/>
  495. <dynamicField name="*_ls" type="plongs" indexed="true" stored="true"/>
  496. <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/>
  497. <dynamicField name="*_fs" type="pfloats" indexed="true" stored="true"/>
  498. <dynamicField name="*_ds" type="pdoubles" indexed="true" stored="true"/>
  499. <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/>
  500. <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
  501. <dynamicField name="*_i" type="pint" indexed="true" stored="true"/>
  502. <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
  503. <dynamicField name="*_l" type="plong" indexed="true" stored="true"/>
  504. <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
  505. <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
  506. <dynamicField name="*_f" type="pfloat" indexed="true" stored="true"/>
  507. <dynamicField name="*_d" type="pdouble" indexed="true" stored="true"/>
  508. <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
  509. <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
  510. <copyField source="content" dest="text_shingles"/>
  511. <copyField source="*" dest="_text_"/>
  512. <!-- ADDED BY SIMON BOWIE 2022-04-04 -->
  513. <copyField source="content" dest="year"/>
  514. <field name="year" type="year" indexed="true"/>
  515. <fieldType name="year" class="solr.TextField" positionIncrementGap="100">
  516. <analyzer>
  517. <tokenizer class="solr.PatternTokenizerFactory" pattern="=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})" group="1" />
  518. </analyzer>
  519. </fieldType>
  520. <!-- END -->
  521. <!-- ADDED BY SIMON BOWIE 2022-08-14 -->
  522. <copyField source="content" dest="country"/>
  523. <field name="country" type="country" indexed="true"/>
  524. <fieldType name="country" class="solr.TextField" positionIncrementGap="100">
  525. <analyzer>
  526. <tokenizer class="solr.PatternTokenizerFactory" pattern="FT=D[^\s]*\s(\w{2})" group="1" />
  527. </analyzer>
  528. </fieldType>
  529. <!-- END -->
  530. </schema>