regex korrigiert

This commit is contained in:
Frederik Jaeckel 2022-11-21 16:23:26 +01:00
parent 4b9c25ec46
commit 517746b4e9

View file

@ -22,7 +22,7 @@ class OnlineSource(ModelSQL, ModelView):
rgxdate = fields.Char(string='Date', required=True,
help='Regex code to find the date in the downloaded HTML file.')
rgxrate = fields.Char(string='Rate', required=True,
help='Regex code to find the drte in the downloaded HTML file.')
help='Regex code to find the rate in the downloaded HTML file.')
rgxisin = fields.Char(string='ISIN',
help='Regex code to find the ISIN in the downloaded HTML file.')
rgxnsin = fields.Char(string='NSIN',
@ -30,7 +30,6 @@ class OnlineSource(ModelSQL, ModelView):
rgxsymbol = fields.Char(string='Symbol',
help='Regex code to find the symbol in the downloaded HTML file.')
# field to test requests
used_url = fields.Function(fields.Char(string='Used URL', readonly=True,
help='This URL is used to retrieve the HTML file.'),
@ -48,7 +47,7 @@ class OnlineSource(ModelSQL, ModelView):
def default_rgxdate(cls):
""" code to find date: dd.mm.yyyy
"""
return '(\d{2}\.\d{2}\.\d{4})'
return '(\\d{2}\\.\\d{2}\\.\\d{4})'
@classmethod
def default_nohtml(cls):
@ -56,20 +55,6 @@ class OnlineSource(ModelSQL, ModelView):
"""
return True
def call_online_source(self):
""" use updated values to call online-source
"""
OSourc = Pool().get('investment.source')
result = OSourc.read_from_website(
self,
isin = self.isin,
nsin = self.nsin,
symbol = self.symbol,
debug=True,
)
self.text = result.get('text', None)
@fields.depends('nsin', 'isin', 'symbol', 'text')
def on_change_nsin(self):
""" run request
@ -108,15 +93,6 @@ class OnlineSource(ModelSQL, ModelView):
"""
return ''
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
""" generate url
"""
return Template(self.url).substitute({
'isin': isin if isin is not None else '',
'nsin': nsin if nsin is not None else '',
'symbol': symbol if symbol is not None else '',
})
@fields.depends('url', 'isin', 'nsin', 'symbol')
def on_change_with_used_url(self, name=None):
""" get url for testing
@ -134,9 +110,33 @@ class OnlineSource(ModelSQL, ModelView):
"""
pass
def call_online_source(self):
""" use updated values to call online-source,
for testing parameters
"""
OSourc = Pool().get('investment.source')
result = OSourc.read_from_website(
self,
isin = self.isin,
nsin = self.nsin,
symbol = self.symbol,
debug=True,
)
self.text = result.get('text', None)
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
""" generate url
"""
return Template(self.url).substitute({
'isin': isin if isin is not None else '',
'nsin': nsin if nsin is not None else '',
'symbol': symbol if symbol is not None else '',
})
@classmethod
def update_rate(cls, asset):
""" read data from inet
""" read data from inet, write result to rates of asset
"""
if asset.updtsource is None:
return
@ -163,7 +163,7 @@ class OnlineSource(ModelSQL, ModelView):
if len(rgxcode) == 0:
print('-- get_regex_result: stop 1')
return None
result = re.compile(rgxcode).match(html_text)
result = re.compile(rgxcode).search(html_text)
if result is None:
print('-- get_regex_result: stop 2')
return None
@ -185,10 +185,10 @@ class OnlineSource(ModelSQL, ModelView):
allow_redirects=True,
timeout=5.0)
if res1.status_code in [200, 204, 404]:
if res1.status_code in [200, 204]:
html = cls.cleanup_spaces(res1.text)
# remove html
# remove html-tags
if updtsource.nohtml:
o1 = html2text.HTML2Text()
o1.ignore_links = True
@ -206,7 +206,11 @@ class OnlineSource(ModelSQL, ModelView):
print('\n## result:', result)
else :
logger.error('read_from_website: %s' % res1.text)
logger.error('read_from_website: %(code)s, url: %(url)s, redirects: [%(redirects)s]' % {
'code': res1.status_code,
'url': res1.url,
'redirects': ', '.join([x.url for x in res1.history]),
})
if debug:
result['text'] = res1.text
return result