# sphinx config
source pages
{
	type				= mysql
	# whether to strip HTML
	# values can be 0 (don't strip) or 1 (do strip)
	# WARNING, only works with mysql source for now
	# WARNING, should work ok for PERFECTLY formed XHTML for now
	# WARNING, POSSIBLE TO BUG on malformed everday HTML
	# optional, default is 0
	strip_html			= 1

	# what HTML attributes to index if stripping HTML
	# format is as follows:
	#
	index_html_attrs	= img=alt,title; a=title;
	
	sql_host			= <%= sphinx_db_host %>
	sql_user			= <%= sphinx_db_user %>
	sql_pass			= <%= sphinx_db_pass %>
	sql_db				= <%= sphinx_db_name %>
	sql_port			= <%= sphinx_db_port %> # optional, default is 3306

  sql_query_pre = SET NAMES UTF8
  sql_query_pre = SET SESSION query_cache_type=OFF
  sql_query_pre = INSERT INTO indexer_status (id, started_at, status, index_name, hostname) VALUES (10, NOW(), 'indexing', 'pages', '<%= sphinx_hostname %>') \
    ON DUPLICATE KEY UPDATE started_at = NOW(), status = 'indexing'
  
  sql_query = SELECT id, user_id, language, UNIX_TIMESTAMP(created_at) AS created_at, UNIX_TIMESTAMP(updated_at) AS updated_at, body, title FROM pages WHERE id>=$start AND id<=$end
  sql_query_range = SELECT MIN(id),MAX(id) FROM pages where type='Article'
  sql_range_step = 1000
  
  sql_query_post = UPDATE indexer_status SET updated_at = NOW(), status = 'updated' WHERE index_name = 'pages' and hostname = '<%= sphinx_hostname %>'
  
  sql_attr_uint      = user_id
  sql_attr_timestamp = created_at
  sql_attr_timestamp = updated_at
}

source pages_delta : pages
{
  # Clear and reset sql_query_pre
  sql_query_pre =
  sql_query_pre = SET NAMES UTF8
  sql_query_pre = SET SESSION query_cache_type=OFF
  sql_query_pre = INSERT INTO indexer_status (id, started_at, status, index_name, hostname) VALUES (11, NOW(), 'indexing', 'pages_delta', '<%= sphinx_hostname %>') \
    ON DUPLICATE KEY UPDATE started_at = NOW(), status = 'indexing'
  
  sql_query = SELECT id, user_id, language, UNIX_TIMESTAMP(created_at) AS created_at, UNIX_TIMESTAMP(updated_at) AS updated_at, body, title \
   FROM pages \
   WHERE updated_at >= (SELECT updated_at FROM indexer_status WHERE id = 11)  
   
  sql_query_post = 
  sql_query_post = UPDATE indexer_status SET updated_at = NOW(), status = 'updated' WHERE index_name = 'pages_delta' and hostname = '<%= sphinx_hostname %>'
  sql_query_range =
  sql_range_step =
}

index pages
{
	source			= pages
	path			= <%= sphinx_index_root %>/pages
	docinfo			= extern
	morphology		= stem_en
	stopwords			= <%= sphinx_conf_path %>/stopwords.txt
	min_word_len		= 1
	charset_type		= utf-8
	min_prefix_len		= 0
	min_infix_len		= 0
}

index pages_delta : pages
{
  source           = pages_delta
  path             = <%= sphinx_index_root %>/pages_delta
  
}

#############################################################################
## indexer settings
#############################################################################

indexer
{
	# memory limit
	#
	# may be specified in bytes (no postfix), kilobytes (mem_limit=1000K)
	# or megabytes (mem_limit=10M)
	#
	# will grow if set unacceptably low
	# will warn if set too low and potentially hurting the performance
	#
	# optional, default is 32M
	mem_limit			= 64M
}

#############################################################################
## searchd settings
#############################################################################

searchd
{
	# IP address on which search daemon will bind and accept
	# incoming network requests
	#
	# optional, default is to listen on all addresses,
	# ie. address = 0.0.0.0
	#
	address				= <%= sphinx_host %>
	# address				= 192.168.0.1


	# port on which search daemon will listen
	port				= <%= sphinx_port %>


	# log file
	# searchd run info is logged here
	log					= <%= sphinx_log_root %>/searchd.log


	# query log file
	# all the search queries are logged here
	query_log			= <%= sphinx_log_root %>/query.log


	# client read timeout, seconds
	read_timeout		= 5


	# maximum amount of children to fork
	# useful to control server load
	max_children		= 30


	# a file which will contain searchd process ID
	# used for different external automation scripts
	# MUST be present
	pid_file			= <%= sphinx_pid_path %>


	# maximum amount of matches this daemon would ever retrieve
	# from each index and serve to client
	#
	# this parameter affects per-client memory and CPU usage
	# (16+ bytes per match) in match sorting phase; so blindly raising
	# it to 1 million is definitely NOT recommended
	#
	# starting from 0.9.7, it can be decreased on the fly through
	# the corresponding API call; increasing is prohibited to protect
	# against malicious and/or malformed requests
	#
	# default is 1000 (just like with Google)
	max_matches			= 1000
}

# --eof--