From 517746b4e94638fc0882d7d3d969ec4dfb62e424 Mon Sep 17 00:00:00 2001 From: Frederik Jaeckel Date: Mon, 21 Nov 2022 16:23:26 +0100 Subject: [PATCH] regex korrigiert --- onlinesource.py | 66 ++++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/onlinesource.py b/onlinesource.py index dcebdb3..797f892 100644 --- a/onlinesource.py +++ b/onlinesource.py @@ -22,7 +22,7 @@ class OnlineSource(ModelSQL, ModelView): rgxdate = fields.Char(string='Date', required=True, help='Regex code to find the date in the downloaded HTML file.') rgxrate = fields.Char(string='Rate', required=True, - help='Regex code to find the drte in the downloaded HTML file.') + help='Regex code to find the rate in the downloaded HTML file.') rgxisin = fields.Char(string='ISIN', help='Regex code to find the ISIN in the downloaded HTML file.') rgxnsin = fields.Char(string='NSIN', @@ -30,7 +30,6 @@ class OnlineSource(ModelSQL, ModelView): rgxsymbol = fields.Char(string='Symbol', help='Regex code to find the symbol in the downloaded HTML file.') - # field to test requests used_url = fields.Function(fields.Char(string='Used URL', readonly=True, help='This URL is used to retrieve the HTML file.'), @@ -48,7 +47,7 @@ class OnlineSource(ModelSQL, ModelView): def default_rgxdate(cls): """ code to find date: dd.mm.yyyy """ - return '(\d{2}\.\d{2}\.\d{4})' + return '(\\d{2}\\.\\d{2}\\.\\d{4})' @classmethod def default_nohtml(cls): @@ -56,20 +55,6 @@ class OnlineSource(ModelSQL, ModelView): """ return True - def call_online_source(self): - """ use updated values to call online-source - """ - OSourc = Pool().get('investment.source') - - result = OSourc.read_from_website( - self, - isin = self.isin, - nsin = self.nsin, - symbol = self.symbol, - debug=True, - ) - self.text = result.get('text', None) - @fields.depends('nsin', 'isin', 'symbol', 'text') def on_change_nsin(self): """ run request @@ -108,15 +93,6 @@ class OnlineSource(ModelSQL, ModelView): """ return '' - def get_url_with_parameter(self, isin=None, nsin=None, symbol=None): - """ generate url - """ - return Template(self.url).substitute({ - 'isin': isin if isin is not None else '', - 'nsin': nsin if nsin is not None else '', - 'symbol': symbol if symbol is not None else '', - }) - @fields.depends('url', 'isin', 'nsin', 'symbol') def on_change_with_used_url(self, name=None): """ get url for testing @@ -134,9 +110,33 @@ class OnlineSource(ModelSQL, ModelView): """ pass + def call_online_source(self): + """ use updated values to call online-source, + for testing parameters + """ + OSourc = Pool().get('investment.source') + + result = OSourc.read_from_website( + self, + isin = self.isin, + nsin = self.nsin, + symbol = self.symbol, + debug=True, + ) + self.text = result.get('text', None) + + def get_url_with_parameter(self, isin=None, nsin=None, symbol=None): + """ generate url + """ + return Template(self.url).substitute({ + 'isin': isin if isin is not None else '', + 'nsin': nsin if nsin is not None else '', + 'symbol': symbol if symbol is not None else '', + }) + @classmethod def update_rate(cls, asset): - """ read data from inet + """ read data from inet, write result to rates of asset """ if asset.updtsource is None: return @@ -163,7 +163,7 @@ class OnlineSource(ModelSQL, ModelView): if len(rgxcode) == 0: print('-- get_regex_result: stop 1') return None - result = re.compile(rgxcode).match(html_text) + result = re.compile(rgxcode).search(html_text) if result is None: print('-- get_regex_result: stop 2') return None @@ -185,10 +185,10 @@ class OnlineSource(ModelSQL, ModelView): allow_redirects=True, timeout=5.0) - if res1.status_code in [200, 204, 404]: + if res1.status_code in [200, 204]: html = cls.cleanup_spaces(res1.text) - # remove html + # remove html-tags if updtsource.nohtml: o1 = html2text.HTML2Text() o1.ignore_links = True @@ -206,7 +206,11 @@ class OnlineSource(ModelSQL, ModelView): print('\n## result:', result) else : - logger.error('read_from_website: %s' % res1.text) + logger.error('read_from_website: %(code)s, url: %(url)s, redirects: [%(redirects)s]' % { + 'code': res1.status_code, + 'url': res1.url, + 'redirects': ', '.join([x.url for x in res1.history]), + }) if debug: result['text'] = res1.text return result