From 2b80f22897bfbbdff3476712dae992a2482a912e Mon Sep 17 00:00:00 2001 From: Frederik Jaeckel Date: Sun, 20 Nov 2022 20:40:51 +0100 Subject: [PATCH] abruf weiter gebaut --- locale/de.po | 60 ++++++++++++++++++++++++++++++ onlinesource.py | 88 +++++++++++++++++++++++++++++++++++++++----- view/source_form.xml | 51 +++++++++++++++++-------- 3 files changed, 174 insertions(+), 25 deletions(-) diff --git a/locale/de.po b/locale/de.po index c42db17..e208a55 100644 --- a/locale/de.po +++ b/locale/de.po @@ -198,6 +198,18 @@ msgctxt "view:investment.source:" msgid "Test parameters" msgstr "Testparameter" +msgctxt "view:investment.source:" +msgid "URL parameter placeholders: ${isin}, ${nsin}, ${symbol}" +msgstr "Platzhalter für URL-Parameter: ${isin}, ${nsin}, ${symbol}" + +msgctxt "view:investment.source:" +msgid "Regular expressions to find data" +msgstr "Reguläre Ausdrücke zum Finden der Daten" + +msgctxt "view:investment.source:" +msgid "How to" +msgstr "So funktionierts" + msgctxt "view:investment.source:" msgid "Configure a source for receiving course data here. The source is queried with the parameters according to schedule." msgstr "Konfigurieren Sie hier eine Quelle für den Empfang von Kursdaten. Die Quelle wird mit den Paramtern nach Zeitplan abgefragt." @@ -242,6 +254,54 @@ msgctxt "help:investment.source,nohtml:" msgid "Removes HTML tags before the text is interpreted." msgstr "Entfernt HTML-Tags bevor der Text interpretiert wird." +msgctxt "field:investment.source,rgxdate:" +msgid "Date" +msgstr "Datum" + +msgctxt "help:investment.source,rgxdate:" +msgid "Regex code to find the date in the downloaded HTML file." +msgstr "Regex-Code, um das Datum in der heruntergeladenen HTML-Datei zu finden." + +msgctxt "field:investment.source,rgxrate:" +msgid "Rate" +msgstr "Kurs" + +msgctxt "help:investment.source,rgxrate:" +msgid "Regex code to find the rate in the downloaded HTML file." +msgstr "Regex-Code, um den Kurs in der heruntergeladenen HTML-Datei zu finden." + +msgctxt "field:investment.source,rgxisin:" +msgid "ISIN" +msgstr "ISIN" + +msgctxt "help:investment.source,rgxisin:" +msgid "Regex code to find the ISIN in the downloaded HTML file." +msgstr "Regex-Code, um die ISIN in der heruntergeladenen HTML-Datei zu finden." + +msgctxt "field:investment.source,rgxnsin:" +msgid "NSIN" +msgstr "WKN" + +msgctxt "help:investment.source,rgxnsin:" +msgid "Regex code to find the NSIN in the downloaded HTML file." +msgstr "Regex-Code, um die WKN in der heruntergeladenen HTML-Datei zu finden." + +msgctxt "field:investment.source,rgxsymbol:" +msgid "Symbol" +msgstr "Symbol" + +msgctxt "help:investment.source,rgxsymbol:" +msgid "Regex code to find the Symbol in the downloaded HTML file." +msgstr "Regex-Code, um das Symbol in der heruntergeladenen HTML-Datei zu finden." + +msgctxt "field:investment.source,used_url:" +msgid "Used URL" +msgstr "verwendete URL" + +msgctxt "help:investment.source,used_url:" +msgid "This URL is used to retrieve the HTML file." +msgstr "Diese URL wird für den Abruf der HTML-Datei verwendet." + ################### # investment.rate # diff --git a/onlinesource.py b/onlinesource.py index 20ffb1f..e2afa7c 100644 --- a/onlinesource.py +++ b/onlinesource.py @@ -3,7 +3,8 @@ # The COPYRIGHT file at the top level of this repository contains the # full copyright notices and license terms. -import requests, logging, html2text +from string import Template +import requests, logging, html2text, re from trytond.model import ModelView, ModelSQL, fields, Unique, Check from trytond.transaction import Transaction from trytond.pool import Pool @@ -18,8 +19,22 @@ class OnlineSource(ModelSQL, ModelView): url = fields.Char(string='URL', required=True) nohtml = fields.Boolean(string='Remove HTML', help='Removes HTML tags before the text is interpreted.') + rgxdate = fields.Char(string='Date', required=True, + help='Regex code to find the date in the downloaded HTML file.') + rgxrate = fields.Char(string='Rate', required=True, + help='Regex code to find the drte in the downloaded HTML file.') + rgxisin = fields.Char(string='ISIN', + help='Regex code to find the ISIN in the downloaded HTML file.') + rgxnsin = fields.Char(string='NSIN', + help='Regex code to find the NSIN in the downloaded HTML file.') + rgxsymbol = fields.Char(string='Symbol', + help='Regex code to find the symbol in the downloaded HTML file.') + # field to test requests + used_url = fields.Function(fields.Char(string='Used URL', readonly=True, + help='This URL is used to retrieve the HTML file.'), + 'on_change_with_used_url') nsin = fields.Function(fields.Char(string='NSIN'), 'on_change_with_nsin', setter='set_test_value') isin = fields.Function(fields.Char(string='ISIN'), @@ -29,13 +44,11 @@ class OnlineSource(ModelSQL, ModelView): text = fields.Function(fields.Text(string='Result', readonly=True), 'on_change_with_text') - def call_online_source(self): - """ use updated values to call online-source + @classmethod + def default_rgxdate(cls): + """ code to find date: dd.mm.yyyy """ - OSourc = Pool().get('investment.source') - - result = OSourc.read_from_website(self, debug=True) - self.text = result.get('text', None) + return '(\d{2}\.\d{2}\.\d{4})' @classmethod def default_nohtml(cls): @@ -43,6 +56,14 @@ class OnlineSource(ModelSQL, ModelView): """ return True + def call_online_source(self): + """ use updated values to call online-source + """ + OSourc = Pool().get('investment.source') + + result = OSourc.read_from_website(self, debug=False) + self.text = result.get('text', None) + @fields.depends('nsin', 'isin', 'symbol', 'text') def on_change_nsin(self): """ run request @@ -81,6 +102,26 @@ class OnlineSource(ModelSQL, ModelView): """ return '' + def get_url_with_parameter(self, isin=None, nsin=None, symbol=None): + """ generate url + """ + return Template(self.url).substitute({ + 'isin': isin if isin is not None else '', + 'nsin': nsin if nsin is not None else '', + 'symbol': symbol if symbol is not None else '', + }) + + @fields.depends('url', 'isin', 'nsin', 'symbol') + def on_change_with_used_url(self, name=None): + """ get url for testing + """ + if self.url: + return self.get_url_with_parameter( + isin = self.isin, + nsin = self.nsin, + symbol = self.symbol, + ) + @classmethod def set_test_value(cls, record, name, value): """ dont store it @@ -107,14 +148,33 @@ class OnlineSource(ModelSQL, ModelView): text = text.replace('\n\r', '\n').replace('\n\n', '\n') return text + @classmethod + def get_regex_result(cls, html_text, rgxcode): + """ run regex on html-text + """ + print('\n## get_regex_result:', rgxcode, type(rgxcode)) + rgxcode = rgxcode or '' + if len(rgxcode) == 0: + return None + result = re.compile(rgxcode).match(html_text) + if result is None: + return None + print('-- result:', result, result.group()) + return result + @classmethod def read_from_website(cls, updtsource, debug=False): """ read from url, extract values """ result = {} - res1 = requests.get(updtsource.url) - if res1.reason == 'OK': + res1 = requests.get( + updtsource.url, + allow_redirects=True, + timeout=5.0) + + print('-- res1:', res1.history) + if res1.status_code in [200, 204, 404]: html = cls.cleanup_spaces(res1.text) # remove html @@ -126,8 +186,16 @@ class OnlineSource(ModelSQL, ModelView): if debug: result['text'] = html + + result['rate'] = cls.get_regex_result(html, updtsource.rgxrate) + result['date'] = cls.get_regex_result(html, updtsource.rgxdate) + result['isin'] = cls.get_regex_result(html, updtsource.rgxisin) + result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin) + result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol) + + print('\n## result:', result) else : - logger.error('read_from_website: %s' % res1.text) + #logger.error('read_from_website: %s' % res1.text) if debug: result['text'] = res1.text return result diff --git a/view/source_form.xml b/view/source_form.xml index 9ab1bb4..b99da97 100644 --- a/view/source_form.xml +++ b/view/source_form.xml @@ -11,23 +11,44 @@ full copyright notices and license terms. -->