Skip to content
This repository was archived by the owner on Jun 10, 2024. It is now read-only.

Refactor utils connect db #769

Merged
merged 11 commits into from
Mar 7, 2018
202 changes: 113 additions & 89 deletions pyspider/database/__init__.py
Original file line number Diff line number Diff line change
@@ -61,83 +61,17 @@ def _connect_database(url): # NOQA
'type should be one of ["taskdb", "projectdb", "resultdb"]', dbtype)

if engine == 'mysql':
parames = {}
if parsed.username:
parames['user'] = parsed.username
if parsed.password:
parames['passwd'] = parsed.password
if parsed.hostname:
parames['host'] = parsed.hostname
if parsed.port:
parames['port'] = parsed.port
if parsed.path.strip('/'):
parames['database'] = parsed.path.strip('/')
return _connect_mysql(parsed,dbtype)

if dbtype == 'taskdb':
from .mysql.taskdb import TaskDB
return TaskDB(**parames)
elif dbtype == 'projectdb':
from .mysql.projectdb import ProjectDB
return ProjectDB(**parames)
elif dbtype == 'resultdb':
from .mysql.resultdb import ResultDB
return ResultDB(**parames)
else:
raise LookupError
elif engine == 'sqlite':
if parsed.path.startswith('//'):
path = '/' + parsed.path.strip('/')
elif parsed.path.startswith('/'):
path = './' + parsed.path.strip('/')
elif not parsed.path:
path = ':memory:'
else:
raise Exception('error path: %s' % parsed.path)

if dbtype == 'taskdb':
from .sqlite.taskdb import TaskDB
return TaskDB(path)
elif dbtype == 'projectdb':
from .sqlite.projectdb import ProjectDB
return ProjectDB(path)
elif dbtype == 'resultdb':
from .sqlite.resultdb import ResultDB
return ResultDB(path)
else:
raise LookupError
return _connect_sqlite(parsed,dbtype)
elif engine == 'mongodb':
url = url.replace(parsed.scheme, 'mongodb')
parames = {}
if parsed.path.strip('/'):
parames['database'] = parsed.path.strip('/')
return _connect_mongodb(parsed,dbtype,url)

if dbtype == 'taskdb':
from .mongodb.taskdb import TaskDB
return TaskDB(url, **parames)
elif dbtype == 'projectdb':
from .mongodb.projectdb import ProjectDB
return ProjectDB(url, **parames)
elif dbtype == 'resultdb':
from .mongodb.resultdb import ResultDB
return ResultDB(url, **parames)
else:
raise LookupError
elif engine == 'sqlalchemy':
if not other_scheme:
raise Exception('wrong scheme format: %s' % parsed.scheme)
url = url.replace(parsed.scheme, other_scheme)
return _connect_sqlalchemy(parsed, dbtype, url, other_scheme)


if dbtype == 'taskdb':
from .sqlalchemy.taskdb import TaskDB
return TaskDB(url)
elif dbtype == 'projectdb':
from .sqlalchemy.projectdb import ProjectDB
return ProjectDB(url)
elif dbtype == 'resultdb':
from .sqlalchemy.resultdb import ResultDB
return ResultDB(url)
else:
raise LookupError
elif engine == 'redis':
if dbtype == 'taskdb':
from .redis.taskdb import TaskDB
@@ -153,24 +87,114 @@ def _connect_database(url): # NOQA
else:
raise LookupError('not supported dbtype: %s', dbtype)
elif engine == 'elasticsearch' or engine == 'es':
# in python 2.6 url like "http://host/?query", query will not been splitted
if parsed.path.startswith('/?'):
index = parse_qs(parsed.path[2:])
else:
index = parse_qs(parsed.query)
if 'index' in index and index['index']:
index = index['index'][0]
else:
index = 'pyspider'
return _connect_elasticsearch(parsed, dbtype)

if dbtype == 'projectdb':
from .elasticsearch.projectdb import ProjectDB
return ProjectDB([parsed.netloc], index=index)
elif dbtype == 'resultdb':
from .elasticsearch.resultdb import ResultDB
return ResultDB([parsed.netloc], index=index)
elif dbtype == 'taskdb':
from .elasticsearch.taskdb import TaskDB
return TaskDB([parsed.netloc], index=index)
else:
raise Exception('unknown engine: %s' % engine)


def _connect_mysql(parsed,dbtype):
parames = {}
if parsed.username:
parames['user'] = parsed.username
if parsed.password:
parames['passwd'] = parsed.password
if parsed.hostname:
parames['host'] = parsed.hostname
if parsed.port:
parames['port'] = parsed.port
if parsed.path.strip('/'):
parames['database'] = parsed.path.strip('/')

if dbtype == 'taskdb':
from .mysql.taskdb import TaskDB
return TaskDB(**parames)
elif dbtype == 'projectdb':
from .mysql.projectdb import ProjectDB
return ProjectDB(**parames)
elif dbtype == 'resultdb':
from .mysql.resultdb import ResultDB
return ResultDB(**parames)
else:
raise LookupError


def _connect_sqlite(parsed,dbtype):
if parsed.path.startswith('//'):
path = '/' + parsed.path.strip('/')
elif parsed.path.startswith('/'):
path = './' + parsed.path.strip('/')
elif not parsed.path:
path = ':memory:'
else:
raise Exception('error path: %s' % parsed.path)

if dbtype == 'taskdb':
from .sqlite.taskdb import TaskDB
return TaskDB(path)
elif dbtype == 'projectdb':
from .sqlite.projectdb import ProjectDB
return ProjectDB(path)
elif dbtype == 'resultdb':
from .sqlite.resultdb import ResultDB
return ResultDB(path)
else:
raise LookupError


def _connect_mongodb(parsed,dbtype,url):
url = url.replace(parsed.scheme, 'mongodb')
parames = {}
if parsed.path.strip('/'):
parames['database'] = parsed.path.strip('/')

if dbtype == 'taskdb':
from .mongodb.taskdb import TaskDB
return TaskDB(url, **parames)
elif dbtype == 'projectdb':
from .mongodb.projectdb import ProjectDB
return ProjectDB(url, **parames)
elif dbtype == 'resultdb':
from .mongodb.resultdb import ResultDB
return ResultDB(url, **parames)
else:
raise LookupError


def _connect_sqlalchemy(parsed, dbtype,url, other_scheme):
if not other_scheme:
raise Exception('wrong scheme format: %s' % parsed.scheme)
url = url.replace(parsed.scheme, other_scheme)
if dbtype == 'taskdb':
from .sqlalchemy.taskdb import TaskDB
return TaskDB(url)
elif dbtype == 'projectdb':
from .sqlalchemy.projectdb import ProjectDB
return ProjectDB(url)
elif dbtype == 'resultdb':
from .sqlalchemy.resultdb import ResultDB
return ResultDB(url)
else:
raise LookupError


def _connect_elasticsearch(parsed, dbtype):
# in python 2.6 url like "http://host/?query", query will not been splitted
if parsed.path.startswith('/?'):
index = parse_qs(parsed.path[2:])
else:
index = parse_qs(parsed.query)
if 'index' in index and index['index']:
index = index['index'][0]
else:
index = 'pyspider'

if dbtype == 'projectdb':
from .elasticsearch.projectdb import ProjectDB
return ProjectDB([parsed.netloc], index=index)
elif dbtype == 'resultdb':
from .elasticsearch.resultdb import ResultDB
return ResultDB([parsed.netloc], index=index)
elif dbtype == 'taskdb':
from .elasticsearch.taskdb import TaskDB
return TaskDB([parsed.netloc], index=index)
58 changes: 34 additions & 24 deletions pyspider/libs/utils.py
Original file line number Diff line number Diff line change
@@ -83,6 +83,7 @@ def format_date(date, gmt_offset=0, relative=True, shorter=False, full_format=Fa

From tornado
"""

if not date:
return '-'
if isinstance(date, float) or isinstance(date, int):
@@ -106,30 +107,12 @@ def format_date(date, gmt_offset=0, relative=True, shorter=False, full_format=Fa

format = None
if not full_format:
if relative and days == 0:
if seconds < 50:
return ("1 second ago" if seconds <= 1 else
"%(seconds)d seconds ago") % {"seconds": seconds}

if seconds < 50 * 60:
minutes = round(seconds / 60.0)
return ("1 minute ago" if minutes <= 1 else
"%(minutes)d minutes ago") % {"minutes": minutes}

hours = round(seconds / (60.0 * 60))
return ("1 hour ago" if hours <= 1 else
"%(hours)d hours ago") % {"hours": hours}

if days == 0:
format = "%(time)s"
elif days == 1 and local_date.day == local_yesterday.day and \
relative:
format = "yesterday" if shorter else "yesterday at %(time)s"
elif days < 5:
format = "%(weekday)s" if shorter else "%(weekday)s at %(time)s"
elif days < 334: # 11mo, since confusing for same month last year
format = "%(month)s-%(day)s" if shorter else \
"%(month)s-%(day)s at %(time)s"
ret_, fff_format = fix_full_format(days, seconds, relative, shorter, local_date, local_yesterday)
format = fff_format
if ret_:
return format
else:
format = format

if format is None:
format = "%(month_name)s %(day)s, %(year)s" if shorter else \
@@ -147,6 +130,33 @@ def format_date(date, gmt_offset=0, relative=True, shorter=False, full_format=Fa
}


def fix_full_format(days, seconds, relative, shorter, local_date, local_yesterday):
if relative and days == 0:
if seconds < 50:
return True, (("1 second ago" if seconds <= 1 else
"%(seconds)d seconds ago") % {"seconds": seconds})

if seconds < 50 * 60:
minutes = round(seconds / 60.0)
return True, (("1 minute ago" if minutes <= 1 else
"%(minutes)d minutes ago") % {"minutes": minutes})

hours = round(seconds / (60.0 * 60))
return True, (("1 hour ago" if hours <= 1 else
"%(hours)d hours ago") % {"hours": hours})
format = None
if days == 0:
format = "%(time)s"
elif days == 1 and local_date.day == local_yesterday.day and \
relative:
format = "yesterday" if shorter else "yesterday at %(time)s"
elif days < 5:
format = "%(weekday)s" if shorter else "%(weekday)s at %(time)s"
elif days < 334: # 11mo, since confusing for same month last year
format = "%(month)s-%(day)s" if shorter else \
"%(month)s-%(day)s at %(time)s"
return False, format

class TimeoutError(Exception):
pass