[postgis-users] Query slow down, never completes
Andreas Forø Tollefsen
andreasft at gmail.com
Wed Oct 12 00:59:27 PDT 2011
Postgresql.conf
# -----------------------------
# PostgreSQL configuration file
# -----------------------------
#
# This file consists of lines of the form:
#
# name = value
#
# (The "=" is optional.) Whitespace may be used. Comments are introduced with
# "#" anywhere on a line. The complete list of parameter names and allowed
# values can be found in the PostgreSQL documentation.
#
# The commented-out settings shown in this file represent the default values.
# Re-commenting a setting is NOT sufficient to revert it to the default value;
# you need to reload the server.
#
# This file is read on server startup and when the server receives a SIGHUP
# signal. If you edit the file on a running system, you have to SIGHUP the
# server for the changes to take effect, or use "pg_ctl reload". Some
# parameters, which are marked below, require a server shutdown and restart to
# take effect.
#
# Any parameter can also be given as a command-line option to the server, e.g.,
# "postgres -c log_connections=on". Some parameters can be changed at run time
# with the "SET" SQL command.
#
# Memory units: kB = kilobytes Time units: ms = milliseconds
# MB = megabytes s = seconds
# GB = gigabytes min = minutes
# h = hours
# d = days
#------------------------------------------------------------------------------
# FILE LOCATIONS
#------------------------------------------------------------------------------
# The default values of these variables are driven from the -D command-line
# option or PGDATA environment variable, represented here as ConfigDir.
data_directory = '/var/lib/postgresql/8.4/main' # use data in another directory
# (change requires restart)
hba_file = '/etc/postgresql/8.4/main/pg_hba.conf' # host-based
authentication file
# (change requires restart)
ident_file = '/etc/postgresql/8.4/main/pg_ident.conf' # ident configuration file
# (change requires restart)
# If external_pid_file is not explicitly set, no extra PID file is written.
external_pid_file = '/var/run/postgresql/8.4-main.pid' # write an
extra PID file
# (change requires restart)
#------------------------------------------------------------------------------
# CONNECTIONS AND AUTHENTICATION
#------------------------------------------------------------------------------
# - Connection Settings -
listen_addresses = '*' # what IP address(es) to listen on;
# comma-separated list of addresses;
# defaults to 'localhost', '*' = all
# (change requires restart)
port = 5432 # (change requires restart)
max_connections = 10 # (change requires restart)
# Note: Increasing max_connections costs ~400 bytes of shared memory per
# connection slot, plus lock space (see max_locks_per_transaction).
#superuser_reserved_connections = 3 # (change requires restart)
unix_socket_directory = '/var/run/postgresql' # (change requires restart)
#unix_socket_group = '' # (change requires restart)
#unix_socket_permissions = 0777 # begin with 0 to use octal notation
# (change requires restart)
#bonjour_name = '' # defaults to the computer name
# (change requires restart)
# - Security and Authentication -
#authentication_timeout = 1min # 1s-600s
ssl = true # (change requires restart)
#ssl_ciphers = 'ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH' # allowed SSL ciphers
# (change requires restart)
#ssl_renegotiation_limit = 512MB # amount of data between renegotiations
#password_encryption = on
#db_user_namespace = off
# Kerberos and GSSAPI
#krb_server_keyfile = ''
#krb_srvname = 'postgres' # (Kerberos only)
#krb_caseins_users = off
# - TCP Keepalives -
# see "man 7 tcp" for details
#tcp_keepalives_idle = 0 # TCP_KEEPIDLE, in seconds;
# 0 selects the system default
#tcp_keepalives_interval = 0 # TCP_KEEPINTVL, in seconds;
# 0 selects the system default
#tcp_keepalives_count = 0 # TCP_KEEPCNT;
# 0 selects the system default
#------------------------------------------------------------------------------
# RESOURCE USAGE (except WAL)
#------------------------------------------------------------------------------
# - Memory -
shared_buffers = 2200MB # min 128kB
# (change requires restart)
temp_buffers = 256MB # min 800kB
#max_prepared_transactions = 0 # zero disables the feature
# (change requires restart)
# Note: Increasing max_prepared_transactions costs ~600 bytes of shared memory
# per transaction slot, plus lock space (see max_locks_per_transaction).
# It is not advisable to set max_prepared_transactions nonzero unless you
# actively intend to use prepared transactions.
work_mem = 512MB # min 64kB
maintenance_work_mem = 128MB # min 1MB
#max_stack_depth = 2MB # min 100kB
# - Kernel Resource Usage -
#max_files_per_process = 1000 # min 25
# (change requires restart)
#shared_preload_libraries = '' # (change requires restart)
# - Cost-Based Vacuum Delay -
#vacuum_cost_delay = 0ms # 0-100 milliseconds
#vacuum_cost_page_hit = 1 # 0-10000 credits
#vacuum_cost_page_miss = 10 # 0-10000 credits
#vacuum_cost_page_dirty = 20 # 0-10000 credits
#vacuum_cost_limit = 200 # 1-10000 credits
# - Background Writer -
#bgwriter_delay = 200ms # 10-10000ms between rounds
#bgwriter_lru_maxpages = 100 # 0-1000 max buffers written/round
#bgwriter_lru_multiplier = 2.0 # 0-10.0 multipler on buffers scanned/round
# - Asynchronous Behavior -
#effective_io_concurrency = 1 # 1-1000. 0 disables prefetching
#------------------------------------------------------------------------------
# WRITE AHEAD LOG
#------------------------------------------------------------------------------
# - Settings -
fsync = off # turns forced synchronization on or off
#synchronous_commit = on # immediate fsync at commit
#wal_sync_method = fsync # the default is the first option
# supported by the operating system:
# open_datasync
# fdatasync (default on Linux)
# fsync
# fsync_writethrough
# open_sync
#full_page_writes = on # recover from partial page writes
wal_buffers = 640kB # min 32kB
# (change requires restart)
#wal_writer_delay = 200ms # 1-10000 milliseconds
commit_delay = 20 # range 0-100000, in microseconds
commit_siblings = 200 # range 1-1000
# - Checkpoints -
checkpoint_segments = 10 # in logfile segments, min 1, 16MB each
checkpoint_timeout = 30s # range 30s-1h
checkpoint_completion_target = 0.5 # checkpoint target duration, 0.0 - 1.0
#checkpoint_warning = 30s # 0 disables
# - Archiving -
#archive_mode = off # allows archiving to be done
# (change requires restart)
#archive_command = '' # command to use to archive a logfile segment
#archive_timeout = 0 # force a logfile segment switch after this
# number of seconds; 0 disables
#------------------------------------------------------------------------------
# QUERY TUNING
#------------------------------------------------------------------------------
# - Planner Method Configuration -
#enable_bitmapscan = on
#enable_hashagg = on
#enable_hashjoin = on
#enable_indexscan = on
#enable_mergejoin = on
#enable_nestloop = on
#enable_seqscan = on
#enable_sort = on
#enable_tidscan = on
# - Planner Cost Constants -
#seq_page_cost = 1.0 # measured on an arbitrary scale
#random_page_cost = 4.0 # same scale as above
#cpu_tuple_cost = 0.01 # same scale as above
#cpu_index_tuple_cost = 0.005 # same scale as above
#cpu_operator_cost = 0.0025 # same scale as above
effective_cache_size = 4096MB
# - Genetic Query Optimizer -
#geqo = on
#geqo_threshold = 12
#geqo_effort = 5 # range 1-10
#geqo_pool_size = 0 # selects default based on effort
#geqo_generations = 0 # selects default based on effort
#geqo_selection_bias = 2.0 # range 1.5-2.0
# - Other Planner Options -
#default_statistics_target = 100 # range 1-10000
#constraint_exclusion = partition # on, off, or partition
#cursor_tuple_fraction = 0.1 # range 0.0-1.0
#from_collapse_limit = 8
#join_collapse_limit = 8 # 1 disables collapsing of explicit
# JOIN clauses
#------------------------------------------------------------------------------
# ERROR REPORTING AND LOGGING
#------------------------------------------------------------------------------
# - Where to Log -
log_destination = 'stderr' # Valid values are combinations of
# stderr, csvlog, syslog and eventlog,
# depending on platform. csvlog
# requires logging_collector to be on.
# This is used when logging to stderr:
logging_collector = on # Enable capturing of stderr and csvlog
# into log files. Required to be on for
# csvlogs.
# (change requires restart)
# These are only used if logging_collector is on:
log_directory = '/var/log/postgresql' # directory where log files are written,
# can be absolute or relative to PGDATA
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern,
# can include strftime() escapes
#log_truncate_on_rotation = off # If on, an existing log file of the
# same name as the new log file will be
# truncated rather than appended to.
# But such truncation only occurs on
# time-driven rotation, not on restarts
# or size-driven rotation. Default is
# off, meaning append to existing files
# in all cases.
#log_rotation_age = 1d # Automatic rotation of logfiles will
# happen after that time. 0 disables.
#log_rotation_size = 10MB # Automatic rotation of logfiles will
# happen after that much log output.
# 0 disables.
# These are relevant when logging to syslog:
#syslog_facility = 'LOCAL0'
#syslog_ident = 'postgres'
#silent_mode = off # Run server silently.
# DO NOT USE without syslog or
# logging_collector
# (change requires restart)
# - When to Log -
#client_min_messages = notice # values in order of decreasing detail:
# debug5
# debug4
# debug3
# debug2
# debug1
# log
# notice
# warning
# error
#log_min_messages = warning # values in order of decreasing detail:
# debug5
# debug4
# debug3
# debug2
# debug1
# info
# notice
# warning
# error
# log
# fatal
# panic
#log_error_verbosity = default # terse, default, or verbose messages
#log_min_error_statement = error # values in order of decreasing detail:
# debug5
# debug4
# debug3
# debug2
# debug1
# info
# notice
# warning
# error
# log
# fatal
# panic (effectively off)
#log_min_duration_statement = -1 # -1 is disabled, 0 logs all statements
# and their durations, > 0 logs only
# statements running at least this number
# of milliseconds
# - What to Log -
#debug_print_parse = off
#debug_print_rewritten = off
#debug_print_plan = off
#debug_pretty_print = on
#log_checkpoints = off
#log_connections = off
#log_disconnections = off
#log_duration = off
#log_hostname = off
log_line_prefix = '%t ' # special values:
# %u = user name
# %d = database name
# %r = remote host and port
# %h = remote host
# %p = process ID
# %t = timestamp without milliseconds
# %m = timestamp with milliseconds
# %i = command tag
# %c = session ID
# %l = session line number
# %s = session start timestamp
# %v = virtual transaction ID
# %x = transaction ID (0 if none)
# %q = stop here in non-session
# processes
# %% = '%'
# e.g. '<%u%%%d> '
#log_lock_waits = off # log lock waits >= deadlock_timeout
#log_statement = 'none' # none, ddl, mod, all
#log_temp_files = -1 # log temporary files equal or larger
# than the specified size in kilobytes;
# -1 disables, 0 logs all temp files
#log_timezone = unknown # actually, defaults to TZ environment
# setting
#------------------------------------------------------------------------------
# RUNTIME STATISTICS
#------------------------------------------------------------------------------
# - Query/Index Statistics Collector -
#track_activities = on
#track_counts = on
#track_functions = none # none, pl, all
#track_activity_query_size = 1024
#update_process_title = on
#stats_temp_directory = 'pg_stat_tmp'
# - Statistics Monitoring -
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
#log_statement_stats = off
#------------------------------------------------------------------------------
# AUTOVACUUM PARAMETERS
#------------------------------------------------------------------------------
#autovacuum = on # Enable autovacuum subprocess? 'on'
# requires track_counts to also be on.
#log_autovacuum_min_duration = -1 # -1 disables, 0 logs all actions and
# their durations, > 0 logs only
# actions running at least this number
# of milliseconds.
#autovacuum_max_workers = 3 # max number of autovacuum subprocesses
#autovacuum_naptime = 1min # time between autovacuum runs
#autovacuum_vacuum_threshold = 50 # min number of row updates before
# vacuum
#autovacuum_analyze_threshold = 50 # min number of row updates before
# analyze
#autovacuum_vacuum_scale_factor = 0.2 # fraction of table size before vacuum
#autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze
#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum
# (change requires restart)
#autovacuum_vacuum_cost_delay = 20ms # default vacuum cost delay for
# autovacuum, in milliseconds;
# -1 means use vacuum_cost_delay
#autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for
# autovacuum, -1 means use
# vacuum_cost_limit
#------------------------------------------------------------------------------
# CLIENT CONNECTION DEFAULTS
#------------------------------------------------------------------------------
# - Statement Behavior -
#search_path = '"$user",public' # schema names
#default_tablespace = '' # a tablespace name, '' uses the default
#temp_tablespaces = '' # a list of tablespace names, '' uses
# only default tablespace
#check_function_bodies = on
#default_transaction_isolation = 'read committed'
#default_transaction_read_only = off
#session_replication_role = 'origin'
#statement_timeout = 0 # in milliseconds, 0 is disabled
#vacuum_freeze_min_age = 50000000
#vacuum_freeze_table_age = 150000000
#xmlbinary = 'base64'
#xmloption = 'content'
# - Locale and Formatting -
datestyle = 'iso, mdy'
#intervalstyle = 'postgres'
#timezone = unknown # actually, defaults to TZ environment
# setting
#timezone_abbreviations = 'Default' # Select the set of available time zone
# abbreviations. Currently, there are
# Default
# Australia
# India
# You can create your own file in
# share/timezonesets/.
#extra_float_digits = 0 # min -15, max 2
#client_encoding = sql_ascii # actually, defaults to database
# encoding
# These settings are initialized by initdb, but they can be changed.
lc_messages = 'en_US.UTF-8' # locale for system error message
# strings
lc_monetary = 'en_US.UTF-8' # locale for monetary formatting
lc_numeric = 'en_US.UTF-8' # locale for number formatting
lc_time = 'en_US.UTF-8' # locale for time formatting
# default configuration for text search
default_text_search_config = 'pg_catalog.english'
# - Other Defaults -
#dynamic_library_path = '$libdir'
#local_preload_libraries = ''
#------------------------------------------------------------------------------
# LOCK MANAGEMENT
#------------------------------------------------------------------------------
#deadlock_timeout = 1s
#max_locks_per_transaction = 64 # min 10
# (change requires restart)
# Note: Each lock table slot uses ~270 bytes of shared memory, and there are
# max_locks_per_transaction * (max_connections + max_prepared_transactions)
# lock table slots.
#------------------------------------------------------------------------------
# VERSION/PLATFORM COMPATIBILITY
#------------------------------------------------------------------------------
# - Previous PostgreSQL Versions -
#add_missing_from = off
#array_nulls = on
#backslash_quote = safe_encoding # on, off, or safe_encoding
#default_with_oids = off
#escape_string_warning = on
#regex_flavor = advanced # advanced, extended, or basic
#sql_inheritance = on
#standard_conforming_strings = off
#synchronize_seqscans = on
# - Other Platforms and Clients -
#transform_null_equals = off
#------------------------------------------------------------------------------
# CUSTOMIZED OPTIONS
#------------------------------------------------------------------------------
#custom_variable_classes = '' # list of custom variable class names
2011/10/12 Andreas Forø Tollefsen <andreasft at gmail.com>
> Yes, I did a version of the query where i terminated the connection in the
> loop after one year, and then reconnected to the server for each year in the
> query.
> This did not change anything, and the query still halted on the same year.
> Like this:
> # For each year calculate the distance to border and insert into the
> borddist table
> yearlist = range(1946, 2009, 1)
> for x in yearlist:
> db1 = psycopg2.connect("host=192.168.1.186 dbname=priogrid
> user=postgres password=postgres")
> cur = db1.cursor()
> print str(time.ctime())+ " Creating borddist for year "+str(x)+"."
> cur.execute("INSERT INTO borddist(gid, gwcode, gridyear, borddist)
> SELECT a.gid, a.gwcode, "+str(x)+", MIN(ST_Distance(ST_Transform(a.centroid,
> 954010), ST_Transform(b.geom, 954010)))/1000 AS borddist \n" \
> "FROM priogridall_geom a, cshapes b, cshapes c WHERE
> a.gwcode != b.gwcode AND b.gwsyear <= "+str(x)+" AND b.gweyear >= "+str(x)+"
> \n" \
> "and a.gwcode = c.gwcode and st_intersects(b.geom, c.geom)
> AND a.gridyear = "+str(x)+" GROUP BY a.gid, a.gwcode;")
> db1.commit()
> cur.close()
> db1.close()
> db1.commit()
> print str(time.ctime())+ " Done"
> cur.close()
> db1.close()
>
> I also followed your suggestion to not write any data. Just do a select,
> without any select into or insert into.
> The same problem occurred.
> Script:
> cur.execute("SELECT a.gid, a.gwcode, "+str(x)+", "\
> "MIN(ST_Distance(a.centroid, b.geom)) "\
> "FROM priogridall_geom a, cshapes b, cshapes c WHERE
> a.gwcode != b.gwcode AND b.gwsyear <= "+str(x)+" AND b.gweyear >= "+str(x)+"
> "\
> "AND a.gwcode = c.gwcode and ST_Intersects(b.geom, c.geom)
> AND a.gridyear = "+str(x)+" GROUP BY a.gid, a.gwcode;")
> db1.commit()
>
> Thank you very much for looking into this. I have used over two weeks to
> try to figure this out.
> The only thing i can do is to run the script for 1/2 the years, then
> restart the server "service postgresql restart" and then run it for the next
> 1/2.
>
> Some memory and cpu information.
>
> Here is how it looks in free -m and top when the script have halted.
> total used free shared buffers cached
> Mem: 5977 5371 605 0 139 4735
> -/+ buffers/cache: 495 5481
> Swap: 6075 1 6074
>
> top - 09:51:07 up 1 day, 18:44, 2 users, load average: 1.88, 1.32, 1.20
> Tasks: 165 total, 2 running, 162 sleeping, 0 stopped, 1 zombie
> Cpu(s): 53.2%us, 1.3%sy, 0.0%ni, 45.5%id, 0.0%wa, 0.0%hi, 0.0%si,
> 0.0%st
> Mem: 6120848k total, 5505868k used, 614980k free, 143004k buffers
> Swap: 6221820k total, 1468k used, 6220352k free, 4849556k cached
>
> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
>
>
> 7810 postgres 20 0 2264m 1.0g 1.0g R 100 18.0 988:09.39 postgres
>
>
> 1417 andreas 20 0 163m 39m 14m S 3 0.7 3:27.13 compiz
>
>
> 907 root 20 0 57092 34m 10m S 2 0.6 1:48.73 Xorg
>
>
> 7088 andreas 20 0 93012 15m 10m S 2 0.3 0:02.80 gnome-terminal
>
>
> 10772 andreas 20 0 204m 96m 23m S 2 1.6 0:19.88
> chromium-browse
>
> 1525 andreas 20 0 100m 15m 9548 S 1 0.3 5:11.16
> unity-panel-ser
>
> 10675 andreas 20 0 308m 50m 29m S 1 0.8 0:07.55
> chromium-browse
>
> 1088 root 20 0 9420 1632 964 S 0 0.0 0:07.01 nmbd
>
>
> 1389 andreas 20 0 5552 2740 700 S 0 0.0 1:17.77 dbus-daemon
>
>
> 10825 andreas 20 0 35836 19m 6276 S 0 0.3 0:02.94 idle-python2.6
>
>
> 1 root 20 0 3040 1780 1220 S 0 0.0 0:02.16 init
>
>
> 2 root 20 0 0 0 0 S 0 0.0 0:00.01 kthreadd
>
>
> 3 root 20 0 0 0 0 S 0 0.0 0:07.21 ksoftirqd/0
>
>
> 6 root RT 0 0 0 0 S 0 0.0 0:00.00 migration/0
>
>
> 7 root RT 0 0 0 0 S 0 0.0 0:00.00 migration/1
>
>
> 9 root 20 0 0 0 0 S 0 0.0 0:03.00 ksoftirqd/1
>
>
> 11 root 0 -20 0 0 0 S 0 0.0 0:00.00 cpuset
>
>
> After i restart the postgresql service:
> total used free shared buffers cached
> Mem: 5977 4319 1657 0 139 3687
> -/+ buffers/cache: 492 5484
> Swap: 6075 1 6074
>
> top - 09:52:33 up 1 day, 18:46, 2 users, load average: 1.42, 1.33, 1.22
> Tasks: 164 total, 1 running, 162 sleeping, 0 stopped, 1 zombie
> Cpu(s): 1.3%us, 0.3%sy, 0.0%ni, 98.3%id, 0.0%wa, 0.0%hi, 0.0%si,
> 0.0%st
> Mem: 6120848k total, 4430204k used, 1690644k free, 143092k buffers
> Swap: 6221820k total, 1468k used, 6220352k free, 3776880k cached
>
> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
>
>
> 907 root 20 0 52996 30m 6728 S 1 0.5 1:49.78 Xorg
>
>
> 1417 andreas 20 0 163m 39m 14m S 1 0.7 3:28.43 compiz
>
>
> 194 root 20 0 0 0 0 S 0 0.0 0:48.68 usb-storage
>
>
> 1447 root 20 0 5564 1000 712 S 0 0.0 0:23.28 udisks-daemon
>
>
> 1525 andreas 20 0 100m 15m 9548 S 0 0.3 5:11.66
> unity-panel-ser
>
> 1583 andreas 20 0 33552 16m 8984 S 0 0.3 0:03.21 applet.py
>
>
> 7039 andreas 20 0 44240 23m 6304 S 0 0.4 0:04.51 idle-python2.6
>
>
> 7088 andreas 20 0 93264 15m 10m S 0 0.3 0:03.28 gnome-terminal
>
>
> 1 root 20 0 3040 1780 1220 S 0 0.0 0:02.16 init
>
>
> 2 root 20 0 0 0 0 S 0 0.0 0:00.01 kthreadd
>
>
> 3 root 20 0 0 0 0 S 0 0.0 0:07.21 ksoftirqd/0
>
>
> 6 root RT 0 0 0 0 S 0 0.0 0:00.00 migration/0
>
>
> 7 root RT 0 0 0 0 S 0 0.0 0:00.00 migration/1
>
>
> 9 root 20 0 0 0 0 S 0 0.0 0:03.00 ksoftirqd/1
>
>
>
> 2011/10/11 Sandro Santilli <strk at keybit.net>
>
>> On Tue, Oct 11, 2011 at 05:12:24PM +0200, Andreas Forø Tollefsen wrote:
>> > Hi Sandro,
>> >
>> > What i find strange is that it stops processing at different years on my
>> > desktop and my laptop. While my desktop stops processing at 1980, my
>> slower
>> > laptop goes on to 1991 before halting.
>> > I also tried with different postgresql.conf shared_buffers settings
>> without
>> > making any difference.
>> > Therefore it is hard to reproduce this for a single year. I can easily
>> > process 1980 or 1991 if just running the script for that year.
>>
>> But you mentioned you had stopped the backend and restarted for each year
>> ?
>> Does the problem still occur if you avoid writing any table (could be an
>> I/O
>> issue) ?
>>
>> --strk;
>>
>> () Free GIS & Flash consultant/developer
>> /\ http://strk.keybit.net/services.html
>> _______________________________________________
>> postgis-users mailing list
>> postgis-users at postgis.refractions.net
>> http://postgis.refractions.net/mailman/listinfo/postgis-users
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.osgeo.org/pipermail/postgis-users/attachments/20111012/824bed18/attachment.html>
More information about the postgis-users
mailing list