form optimiert, regex-konvertierung ok + test

This commit is contained in:
Frederik Jaeckel 2022-11-21 23:12:26 +01:00
parent 517746b4e9
commit b57c69abf0
6 changed files with 328 additions and 84 deletions

View file

@ -5,12 +5,36 @@
from string import Template
import requests, logging, html2text, re
from datetime import datetime
from decimal import Decimal
from trytond.model import ModelView, ModelSQL, fields, Unique, Check
from trytond.transaction import Transaction
from trytond.pool import Pool
from trytond.pyson import Eval, Bool
logger = logging.getLogger(__name__)
sel_rgxdecimal = [
('.', '.'),
(',', ','),
]
sel_rgxidtype = [
('isin', 'ISIN'),
('nsin', 'NSIN'),
('symbol', 'Symbol'),
]
sel_rgxdatefmt = [
('%d.%m.%Y', 'dd.mm.yyyy'),
('%m/%d/%Y', 'mm/dd/yyyy'),
('%Y-%m-%d', 'yyyy-mm-dd'),
]
fields_check = ['url', 'nsin', 'isin', 'symbol', 'text', 'http_state']
class OnlineSource(ModelSQL, ModelView):
'Online Source'
__name__ = 'investment.source'
@ -21,14 +45,20 @@ class OnlineSource(ModelSQL, ModelView):
help='Removes HTML tags before the text is interpreted.')
rgxdate = fields.Char(string='Date', required=True,
help='Regex code to find the date in the downloaded HTML file.')
rgxdatefmt = fields.Selection(string='Date format', required=True,
selection=sel_rgxdatefmt)
rgxrate = fields.Char(string='Rate', required=True,
help='Regex code to find the rate in the downloaded HTML file.')
rgxisin = fields.Char(string='ISIN',
help='Regex code to find the ISIN in the downloaded HTML file.')
rgxnsin = fields.Char(string='NSIN',
help='Regex code to find the NSIN in the downloaded HTML file.')
rgxsymbol = fields.Char(string='Symbol',
help='Regex code to find the symbol in the downloaded HTML file.')
rgxdecimal = fields.Selection(string='Decimal Separator', required=True,
help='Decimal separator for converting the market value into a number.',
selection=sel_rgxdecimal)
rgxident = fields.Char(string='Identifier',
help='Regex code to find the identifier in the downloaded HTML file.')
rgxidtype = fields.Selection(string='ID-Type', selection=sel_rgxidtype,
help='Type of identifier used to validate the result.',
states={
'required': Bool(Eval('rgxident', '')),
}, depends=['rgxident'])
# field to test requests
used_url = fields.Function(fields.Char(string='Used URL', readonly=True,
@ -40,57 +70,84 @@ class OnlineSource(ModelSQL, ModelView):
'on_change_with_isin', setter='set_test_value')
symbol = fields.Function(fields.Char(string='Symbol'),
'on_change_with_symbol', setter='set_test_value')
http_state = fields.Function(fields.Char(string='HTTP-State',
readonly=True), 'on_change_with_http_state')
text = fields.Function(fields.Text(string='Result',
readonly=True), 'on_change_with_text')
@classmethod
def default_url(cls):
""" defaul-url
"""
return 'https://'
@classmethod
def default_rgxdate(cls):
""" code to find date: dd.mm.yyyy
"""
return '(\\d{2}\\.\\d{2}\\.\\d{4})'
@classmethod
def default_rgxdatefmt(cls):
""" dd.mm.yyyy
"""
return '%d.%m.%Y'
@classmethod
def default_rgxrate(cls):
""" nn,nn
"""
return '(\\d+,\\d+)'
@classmethod
def default_rgxidtype(cls):
""" isin
"""
return 'isin'
@classmethod
def default_rgxdecimal(cls):
""" comma
"""
return ','
@classmethod
def default_nohtml(cls):
""" default: True
"""
return True
@fields.depends('nsin', 'isin', 'symbol', 'text')
@fields.depends(*fields_check)
def on_change_nsin(self):
""" run request
"""
self.call_online_source()
@fields.depends('nsin', 'isin', 'symbol', 'text')
@fields.depends(*fields_check)
def on_change_isin(self):
""" run request
"""
self.call_online_source()
@fields.depends('nsin', 'isin', 'symbol', 'text')
@fields.depends(*fields_check)
def on_change_symbol(self):
""" run request
"""
self.call_online_source()
def on_change_with_http_state(self, name=True):
return ''
def on_change_with_text(self, name=None):
""" return existing value
"""
return ''
def on_change_with_nsin(self, name=None):
""" return existing value
"""
return ''
def on_change_with_isin(self, name=None):
""" return existing value
"""
return ''
def on_change_with_symbol(self, name=None):
""" return existing value
"""
return ''
@fields.depends('url', 'isin', 'nsin', 'symbol')
@ -121,18 +178,20 @@ class OnlineSource(ModelSQL, ModelView):
isin = self.isin,
nsin = self.nsin,
symbol = self.symbol,
debug=True,
debug = True,
)
self.text = result.get('text', None)
self.http_state = result.get('http_state', None)
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
""" generate url
"""
return Template(self.url).substitute({
'isin': isin if isin is not None else '',
'nsin': nsin if nsin is not None else '',
'symbol': symbol if symbol is not None else '',
})
if self.url:
return Template(self.url).substitute({
'isin': isin if isin is not None else '',
'nsin': nsin if nsin is not None else '',
'symbol': symbol if symbol is not None else '',
})
@classmethod
def update_rate(cls, asset):
@ -140,34 +199,38 @@ class OnlineSource(ModelSQL, ModelView):
"""
if asset.updtsource is None:
return
rate_data = cls.read_from_website(asset.updtsource)
@classmethod
def cleanup_spaces(cls, text):
""" remove multiple spaces
def get_regex_result(self, html_text, field_name):
""" run regex on html-text, convert result
"""
len1 = -1
while len1 != len(text):
len1 = len(text)
text = text.replace('\t', ' ').replace(' ', ' ')
text = text.replace('\n\r', '\n').replace('\n\n', '\n')
return text
@classmethod
def get_regex_result(cls, html_text, rgxcode):
""" run regex on html-text
"""
print('\n## get_regex_result:', rgxcode, type(rgxcode))
rgxcode = rgxcode or ''
rgxcode = getattr(self, field_name) or ''
if len(rgxcode) == 0:
print('-- get_regex_result: stop 1')
return None
result = re.compile(rgxcode).search(html_text)
if result is None:
print('-- get_regex_result: stop 2')
search_result = re.compile(rgxcode).search(html_text)
if search_result is None:
return None
print('-- get_regex_result - result:', result, result.group())
try :
result = search_result.group(1)
except IndexError:
result = search_result.group(0)
if field_name == 'rgxrate':
dec_sep = [',', '.']
dec_sep.remove(self.rgxdecimal)
result = result.replace(dec_sep[0], '').replace(self.rgxdecimal, '.')
try :
result = Decimal(result)
except :
result = None
elif field_name == 'rgxdate':
try :
result = datetime.strptime(result, self.rgxdatefmt).date()
except :
result = None
return result
@classmethod
@ -176,6 +239,10 @@ class OnlineSource(ModelSQL, ModelView):
"""
result = {}
if updtsource.url == 'https://':
result['text'] = 'invalid url'
return result
res1 = requests.get(
updtsource.get_url_with_parameter(
isin = isin,
@ -185,24 +252,31 @@ class OnlineSource(ModelSQL, ModelView):
allow_redirects=True,
timeout=5.0)
result['http_state'] = '%(code)d: %(msg)s' % {
'code': res1.status_code,
'msg': res1.reason,
}
if res1.status_code in [200, 204]:
html = cls.cleanup_spaces(res1.text)
html = res1.text
# remove html-tags
if updtsource.nohtml:
o1 = html2text.HTML2Text()
o1.ignore_links = True
o1.ignore_tables = True
o1.bypass_tables = False
o1.single_line_break = True
o1.body_width = 0
html = o1.handle(html)
del o1
if debug:
result['text'] = html
result['rate'] = cls.get_regex_result(html, updtsource.rgxrate)
result['date'] = cls.get_regex_result(html, updtsource.rgxdate)
result['isin'] = cls.get_regex_result(html, updtsource.rgxisin)
result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin)
result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol)
result['rate'] = updtsource.get_regex_result(html, 'rgxrate')
result['date'] = updtsource.get_regex_result(html, 'rgxdate')
result['code'] = updtsource.get_regex_result(html, 'rgxcode')
print('\n## result:', result)
else :