form optimiert, regex-konvertierung ok + test
This commit is contained in:
parent
517746b4e9
commit
b57c69abf0
6 changed files with 328 additions and 84 deletions
176
onlinesource.py
176
onlinesource.py
|
@ -5,12 +5,36 @@
|
|||
|
||||
from string import Template
|
||||
import requests, logging, html2text, re
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from trytond.model import ModelView, ModelSQL, fields, Unique, Check
|
||||
from trytond.transaction import Transaction
|
||||
from trytond.pool import Pool
|
||||
from trytond.pyson import Eval, Bool
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
sel_rgxdecimal = [
|
||||
('.', '.'),
|
||||
(',', ','),
|
||||
]
|
||||
|
||||
|
||||
sel_rgxidtype = [
|
||||
('isin', 'ISIN'),
|
||||
('nsin', 'NSIN'),
|
||||
('symbol', 'Symbol'),
|
||||
]
|
||||
|
||||
sel_rgxdatefmt = [
|
||||
('%d.%m.%Y', 'dd.mm.yyyy'),
|
||||
('%m/%d/%Y', 'mm/dd/yyyy'),
|
||||
('%Y-%m-%d', 'yyyy-mm-dd'),
|
||||
]
|
||||
|
||||
fields_check = ['url', 'nsin', 'isin', 'symbol', 'text', 'http_state']
|
||||
|
||||
|
||||
class OnlineSource(ModelSQL, ModelView):
|
||||
'Online Source'
|
||||
__name__ = 'investment.source'
|
||||
|
@ -21,14 +45,20 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
help='Removes HTML tags before the text is interpreted.')
|
||||
rgxdate = fields.Char(string='Date', required=True,
|
||||
help='Regex code to find the date in the downloaded HTML file.')
|
||||
rgxdatefmt = fields.Selection(string='Date format', required=True,
|
||||
selection=sel_rgxdatefmt)
|
||||
rgxrate = fields.Char(string='Rate', required=True,
|
||||
help='Regex code to find the rate in the downloaded HTML file.')
|
||||
rgxisin = fields.Char(string='ISIN',
|
||||
help='Regex code to find the ISIN in the downloaded HTML file.')
|
||||
rgxnsin = fields.Char(string='NSIN',
|
||||
help='Regex code to find the NSIN in the downloaded HTML file.')
|
||||
rgxsymbol = fields.Char(string='Symbol',
|
||||
help='Regex code to find the symbol in the downloaded HTML file.')
|
||||
rgxdecimal = fields.Selection(string='Decimal Separator', required=True,
|
||||
help='Decimal separator for converting the market value into a number.',
|
||||
selection=sel_rgxdecimal)
|
||||
rgxident = fields.Char(string='Identifier',
|
||||
help='Regex code to find the identifier in the downloaded HTML file.')
|
||||
rgxidtype = fields.Selection(string='ID-Type', selection=sel_rgxidtype,
|
||||
help='Type of identifier used to validate the result.',
|
||||
states={
|
||||
'required': Bool(Eval('rgxident', '')),
|
||||
}, depends=['rgxident'])
|
||||
|
||||
# field to test requests
|
||||
used_url = fields.Function(fields.Char(string='Used URL', readonly=True,
|
||||
|
@ -40,57 +70,84 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
'on_change_with_isin', setter='set_test_value')
|
||||
symbol = fields.Function(fields.Char(string='Symbol'),
|
||||
'on_change_with_symbol', setter='set_test_value')
|
||||
http_state = fields.Function(fields.Char(string='HTTP-State',
|
||||
readonly=True), 'on_change_with_http_state')
|
||||
text = fields.Function(fields.Text(string='Result',
|
||||
readonly=True), 'on_change_with_text')
|
||||
|
||||
@classmethod
|
||||
def default_url(cls):
|
||||
""" defaul-url
|
||||
"""
|
||||
return 'https://'
|
||||
|
||||
@classmethod
|
||||
def default_rgxdate(cls):
|
||||
""" code to find date: dd.mm.yyyy
|
||||
"""
|
||||
return '(\\d{2}\\.\\d{2}\\.\\d{4})'
|
||||
|
||||
@classmethod
|
||||
def default_rgxdatefmt(cls):
|
||||
""" dd.mm.yyyy
|
||||
"""
|
||||
return '%d.%m.%Y'
|
||||
|
||||
@classmethod
|
||||
def default_rgxrate(cls):
|
||||
""" nn,nn
|
||||
"""
|
||||
return '(\\d+,\\d+)'
|
||||
|
||||
@classmethod
|
||||
def default_rgxidtype(cls):
|
||||
""" isin
|
||||
"""
|
||||
return 'isin'
|
||||
|
||||
@classmethod
|
||||
def default_rgxdecimal(cls):
|
||||
""" comma
|
||||
"""
|
||||
return ','
|
||||
|
||||
@classmethod
|
||||
def default_nohtml(cls):
|
||||
""" default: True
|
||||
"""
|
||||
return True
|
||||
|
||||
@fields.depends('nsin', 'isin', 'symbol', 'text')
|
||||
@fields.depends(*fields_check)
|
||||
def on_change_nsin(self):
|
||||
""" run request
|
||||
"""
|
||||
self.call_online_source()
|
||||
|
||||
@fields.depends('nsin', 'isin', 'symbol', 'text')
|
||||
@fields.depends(*fields_check)
|
||||
def on_change_isin(self):
|
||||
""" run request
|
||||
"""
|
||||
self.call_online_source()
|
||||
|
||||
@fields.depends('nsin', 'isin', 'symbol', 'text')
|
||||
@fields.depends(*fields_check)
|
||||
def on_change_symbol(self):
|
||||
""" run request
|
||||
"""
|
||||
self.call_online_source()
|
||||
|
||||
def on_change_with_http_state(self, name=True):
|
||||
return ''
|
||||
|
||||
def on_change_with_text(self, name=None):
|
||||
""" return existing value
|
||||
"""
|
||||
return ''
|
||||
|
||||
def on_change_with_nsin(self, name=None):
|
||||
""" return existing value
|
||||
"""
|
||||
return ''
|
||||
|
||||
def on_change_with_isin(self, name=None):
|
||||
""" return existing value
|
||||
"""
|
||||
return ''
|
||||
|
||||
def on_change_with_symbol(self, name=None):
|
||||
""" return existing value
|
||||
"""
|
||||
return ''
|
||||
|
||||
@fields.depends('url', 'isin', 'nsin', 'symbol')
|
||||
|
@ -121,18 +178,20 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
isin = self.isin,
|
||||
nsin = self.nsin,
|
||||
symbol = self.symbol,
|
||||
debug=True,
|
||||
debug = True,
|
||||
)
|
||||
self.text = result.get('text', None)
|
||||
self.http_state = result.get('http_state', None)
|
||||
|
||||
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
|
||||
""" generate url
|
||||
"""
|
||||
return Template(self.url).substitute({
|
||||
'isin': isin if isin is not None else '',
|
||||
'nsin': nsin if nsin is not None else '',
|
||||
'symbol': symbol if symbol is not None else '',
|
||||
})
|
||||
if self.url:
|
||||
return Template(self.url).substitute({
|
||||
'isin': isin if isin is not None else '',
|
||||
'nsin': nsin if nsin is not None else '',
|
||||
'symbol': symbol if symbol is not None else '',
|
||||
})
|
||||
|
||||
@classmethod
|
||||
def update_rate(cls, asset):
|
||||
|
@ -140,34 +199,38 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
"""
|
||||
if asset.updtsource is None:
|
||||
return
|
||||
|
||||
rate_data = cls.read_from_website(asset.updtsource)
|
||||
|
||||
@classmethod
|
||||
def cleanup_spaces(cls, text):
|
||||
""" remove multiple spaces
|
||||
def get_regex_result(self, html_text, field_name):
|
||||
""" run regex on html-text, convert result
|
||||
"""
|
||||
len1 = -1
|
||||
while len1 != len(text):
|
||||
len1 = len(text)
|
||||
text = text.replace('\t', ' ').replace(' ', ' ')
|
||||
text = text.replace('\n\r', '\n').replace('\n\n', '\n')
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def get_regex_result(cls, html_text, rgxcode):
|
||||
""" run regex on html-text
|
||||
"""
|
||||
print('\n## get_regex_result:', rgxcode, type(rgxcode))
|
||||
rgxcode = rgxcode or ''
|
||||
rgxcode = getattr(self, field_name) or ''
|
||||
if len(rgxcode) == 0:
|
||||
print('-- get_regex_result: stop 1')
|
||||
return None
|
||||
result = re.compile(rgxcode).search(html_text)
|
||||
if result is None:
|
||||
print('-- get_regex_result: stop 2')
|
||||
|
||||
search_result = re.compile(rgxcode).search(html_text)
|
||||
if search_result is None:
|
||||
return None
|
||||
print('-- get_regex_result - result:', result, result.group())
|
||||
|
||||
try :
|
||||
result = search_result.group(1)
|
||||
except IndexError:
|
||||
result = search_result.group(0)
|
||||
|
||||
if field_name == 'rgxrate':
|
||||
dec_sep = [',', '.']
|
||||
dec_sep.remove(self.rgxdecimal)
|
||||
|
||||
result = result.replace(dec_sep[0], '').replace(self.rgxdecimal, '.')
|
||||
try :
|
||||
result = Decimal(result)
|
||||
except :
|
||||
result = None
|
||||
elif field_name == 'rgxdate':
|
||||
try :
|
||||
result = datetime.strptime(result, self.rgxdatefmt).date()
|
||||
except :
|
||||
result = None
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
|
@ -176,6 +239,10 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
"""
|
||||
result = {}
|
||||
|
||||
if updtsource.url == 'https://':
|
||||
result['text'] = 'invalid url'
|
||||
return result
|
||||
|
||||
res1 = requests.get(
|
||||
updtsource.get_url_with_parameter(
|
||||
isin = isin,
|
||||
|
@ -185,24 +252,31 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
allow_redirects=True,
|
||||
timeout=5.0)
|
||||
|
||||
result['http_state'] = '%(code)d: %(msg)s' % {
|
||||
'code': res1.status_code,
|
||||
'msg': res1.reason,
|
||||
}
|
||||
|
||||
if res1.status_code in [200, 204]:
|
||||
html = cls.cleanup_spaces(res1.text)
|
||||
html = res1.text
|
||||
|
||||
# remove html-tags
|
||||
if updtsource.nohtml:
|
||||
o1 = html2text.HTML2Text()
|
||||
o1.ignore_links = True
|
||||
o1.ignore_tables = True
|
||||
o1.bypass_tables = False
|
||||
o1.single_line_break = True
|
||||
o1.body_width = 0
|
||||
html = o1.handle(html)
|
||||
del o1
|
||||
|
||||
if debug:
|
||||
result['text'] = html
|
||||
|
||||
result['rate'] = cls.get_regex_result(html, updtsource.rgxrate)
|
||||
result['date'] = cls.get_regex_result(html, updtsource.rgxdate)
|
||||
result['isin'] = cls.get_regex_result(html, updtsource.rgxisin)
|
||||
result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin)
|
||||
result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol)
|
||||
result['rate'] = updtsource.get_regex_result(html, 'rgxrate')
|
||||
result['date'] = updtsource.get_regex_result(html, 'rgxdate')
|
||||
result['code'] = updtsource.get_regex_result(html, 'rgxcode')
|
||||
|
||||
print('\n## result:', result)
|
||||
else :
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue