From b57c69abf019fccd49f7962cd8529a5995ff69ef Mon Sep 17 00:00:00 2001 From: Frederik Jaeckel Date: Mon, 21 Nov 2022 23:12:26 +0100 Subject: [PATCH] form optimiert, regex-konvertierung ok + test --- locale/de.po | 68 +++++++++++------ locale/en.po | 84 +++++++++++++++++++++ onlinesource.py | 176 ++++++++++++++++++++++++++++++------------- tests/__init__.py | 2 + tests/test_source.py | 56 ++++++++++++++ view/source_form.xml | 26 ++++--- 6 files changed, 328 insertions(+), 84 deletions(-) create mode 100644 tests/test_source.py diff --git a/locale/de.po b/locale/de.po index e208a55..1c67702 100644 --- a/locale/de.po +++ b/locale/de.po @@ -238,6 +238,10 @@ msgctxt "field:investment.source,isin:" msgid "ISIN" msgstr "ISIN" +msgctxt "field:investment.source,http_state:" +msgid "HTTP-State" +msgstr "HTTP-Status" + msgctxt "field:investment.source,symbol:" msgid "Symbol" msgstr "Symbol" @@ -270,29 +274,13 @@ msgctxt "help:investment.source,rgxrate:" msgid "Regex code to find the rate in the downloaded HTML file." msgstr "Regex-Code, um den Kurs in der heruntergeladenen HTML-Datei zu finden." -msgctxt "field:investment.source,rgxisin:" -msgid "ISIN" -msgstr "ISIN" +msgctxt "field:investment.source,rgxident:" +msgid "Identifier" +msgstr "Bezeichner" -msgctxt "help:investment.source,rgxisin:" -msgid "Regex code to find the ISIN in the downloaded HTML file." -msgstr "Regex-Code, um die ISIN in der heruntergeladenen HTML-Datei zu finden." - -msgctxt "field:investment.source,rgxnsin:" -msgid "NSIN" -msgstr "WKN" - -msgctxt "help:investment.source,rgxnsin:" -msgid "Regex code to find the NSIN in the downloaded HTML file." -msgstr "Regex-Code, um die WKN in der heruntergeladenen HTML-Datei zu finden." - -msgctxt "field:investment.source,rgxsymbol:" -msgid "Symbol" -msgstr "Symbol" - -msgctxt "help:investment.source,rgxsymbol:" -msgid "Regex code to find the Symbol in the downloaded HTML file." -msgstr "Regex-Code, um das Symbol in der heruntergeladenen HTML-Datei zu finden." +msgctxt "help:investment.source,rgxident:" +msgid "Regex code to find the identifier in the downloaded HTML file." +msgstr "Regex-Code, um den Bezeichner in der heruntergeladenen HTML-Datei zu finden." msgctxt "field:investment.source,used_url:" msgid "Used URL" @@ -302,6 +290,42 @@ msgctxt "help:investment.source,used_url:" msgid "This URL is used to retrieve the HTML file." msgstr "Diese URL wird für den Abruf der HTML-Datei verwendet." +msgctxt "field:investment.source,rgxdecimal:" +msgid "Decimal Separator" +msgstr "Dezimaltrenner" + +msgctxt "help:investment.source,rgxdecimal:" +msgid "Decimal separator for converting the market value into a number." +msgstr "Dezimaltrenner für die Umwandlung des Kurswertes in eine Zahl." + +msgctxt "field:investment.source,rgxidtype:" +msgid "ID-Type" +msgstr "ID-Typ" + +msgctxt "help:investment.source,rgxidtype:" +msgid "Type of identifier used to validate the result." +msgstr "Typ des Bezeichners zur Validierung des Ergebnisses." + +msgctxt "selection:investment.source,rgxidtype:" +msgid "ISIN" +msgstr "ISIN" + +msgctxt "selection:investment.source,rgxidtype:" +msgid "NSIN" +msgstr "WKN" + +msgctxt "selection:investment.source,rgxidtype:" +msgid "Symbol" +msgstr "Symbol" + +msgctxt "field:investment.source,rgxdatefmt:" +msgid "Date format" +msgstr "Datumsformat" + +msgctxt "help:investment.source,rgxidtype:" +msgid "Type of identifier used to validate the result." +msgstr "Typ des Bezeichners zur Validierung des Ergebnisses." + ################### # investment.rate # diff --git a/locale/en.po b/locale/en.po index 61fe75a..41f953d 100644 --- a/locale/en.po +++ b/locale/en.po @@ -166,6 +166,18 @@ msgctxt "view:investment.source:" msgid "Test parameters" msgstr "Test parameters" +msgctxt "view:investment.source:" +msgid "URL parameter placeholders: ${isin}, ${nsin}, ${symbol}" +msgstr "URL parameter placeholders: ${isin}, ${nsin}, ${symbol}" + +msgctxt "view:investment.source:" +msgid "Regular expressions to find data" +msgstr "Regular expressions to find data" + +msgctxt "view:investment.source:" +msgid "How to" +msgstr "How to" + msgctxt "view:investment.source:" msgid "Configure a source for receiving course data here. The source is queried with the parameters according to schedule." msgstr "Configure a source for receiving course data here. The source is queried with the parameters according to schedule." @@ -194,6 +206,10 @@ msgctxt "field:investment.source,isin:" msgid "ISIN" msgstr "ISIN" +msgctxt "field:investment.source,http_state:" +msgid "HTTP-State" +msgstr "HTTP-State" + msgctxt "field:investment.source,symbol:" msgid "Symbol" msgstr "Symbol" @@ -210,6 +226,74 @@ msgctxt "help:investment.source,nohtml:" msgid "Removes HTML tags before the text is interpreted." msgstr "Removes HTML tags before the text is interpreted." +msgctxt "field:investment.source,rgxdate:" +msgid "Date" +msgstr "Date" + +msgctxt "help:investment.source,rgxdate:" +msgid "Regex code to find the date in the downloaded HTML file." +msgstr "Regex code to find the date in the downloaded HTML file." + +msgctxt "field:investment.source,rgxrate:" +msgid "Rate" +msgstr "Rate" + +msgctxt "help:investment.source,rgxrate:" +msgid "Regex code to find the rate in the downloaded HTML file." +msgstr "Regex code to find the rate in the downloaded HTML file." + +msgctxt "field:investment.source,rgxident:" +msgid "Identifier" +msgstr "Identifier" + +msgctxt "help:investment.source,rgxident:" +msgid "Regex code to find the identifier in the downloaded HTML file." +msgstr "Regex code to find the identifier in the downloaded HTML file." + +msgctxt "field:investment.source,used_url:" +msgid "Used URL" +msgstr "Used URL" + +msgctxt "help:investment.source,used_url:" +msgid "This URL is used to retrieve the HTML file." +msgstr "This URL is used to retrieve the HTML file." + +msgctxt "field:investment.source,rgxdecimal:" +msgid "Decimal Separator" +msgstr "Decimal Separator" + +msgctxt "help:investment.source,rgxdecimal:" +msgid "Decimal separator for converting the market value into a number." +msgstr "Decimal separator for converting the market value into a number." + +msgctxt "field:investment.source,rgxidtype:" +msgid "ID-Type" +msgstr "ID-Type" + +msgctxt "help:investment.source,rgxidtype:" +msgid "Type of identifier used to validate the result." +msgstr "Type of identifier used to validate the result." + +msgctxt "selection:investment.source,rgxidtype:" +msgid "ISIN" +msgstr "ISIN" + +msgctxt "selection:investment.source,rgxidtype:" +msgid "NSIN" +msgstr "NSIN" + +msgctxt "selection:investment.source,rgxidtype:" +msgid "Symbol" +msgstr "Symbol" + +msgctxt "field:investment.source,rgxdatefmt:" +msgid "Date format" +msgstr "Date format" + +msgctxt "help:investment.source,rgxidtype:" +msgid "Type of identifier used to validate the result." +msgstr "Type of identifier used to validate the result." + msgctxt "model:investment.rate,name:" msgid "Rate" msgstr "Rate" diff --git a/onlinesource.py b/onlinesource.py index 797f892..23d49b8 100644 --- a/onlinesource.py +++ b/onlinesource.py @@ -5,12 +5,36 @@ from string import Template import requests, logging, html2text, re +from datetime import datetime +from decimal import Decimal from trytond.model import ModelView, ModelSQL, fields, Unique, Check from trytond.transaction import Transaction from trytond.pool import Pool +from trytond.pyson import Eval, Bool logger = logging.getLogger(__name__) +sel_rgxdecimal = [ + ('.', '.'), + (',', ','), + ] + + +sel_rgxidtype = [ + ('isin', 'ISIN'), + ('nsin', 'NSIN'), + ('symbol', 'Symbol'), + ] + +sel_rgxdatefmt = [ + ('%d.%m.%Y', 'dd.mm.yyyy'), + ('%m/%d/%Y', 'mm/dd/yyyy'), + ('%Y-%m-%d', 'yyyy-mm-dd'), + ] + +fields_check = ['url', 'nsin', 'isin', 'symbol', 'text', 'http_state'] + + class OnlineSource(ModelSQL, ModelView): 'Online Source' __name__ = 'investment.source' @@ -21,14 +45,20 @@ class OnlineSource(ModelSQL, ModelView): help='Removes HTML tags before the text is interpreted.') rgxdate = fields.Char(string='Date', required=True, help='Regex code to find the date in the downloaded HTML file.') + rgxdatefmt = fields.Selection(string='Date format', required=True, + selection=sel_rgxdatefmt) rgxrate = fields.Char(string='Rate', required=True, help='Regex code to find the rate in the downloaded HTML file.') - rgxisin = fields.Char(string='ISIN', - help='Regex code to find the ISIN in the downloaded HTML file.') - rgxnsin = fields.Char(string='NSIN', - help='Regex code to find the NSIN in the downloaded HTML file.') - rgxsymbol = fields.Char(string='Symbol', - help='Regex code to find the symbol in the downloaded HTML file.') + rgxdecimal = fields.Selection(string='Decimal Separator', required=True, + help='Decimal separator for converting the market value into a number.', + selection=sel_rgxdecimal) + rgxident = fields.Char(string='Identifier', + help='Regex code to find the identifier in the downloaded HTML file.') + rgxidtype = fields.Selection(string='ID-Type', selection=sel_rgxidtype, + help='Type of identifier used to validate the result.', + states={ + 'required': Bool(Eval('rgxident', '')), + }, depends=['rgxident']) # field to test requests used_url = fields.Function(fields.Char(string='Used URL', readonly=True, @@ -40,57 +70,84 @@ class OnlineSource(ModelSQL, ModelView): 'on_change_with_isin', setter='set_test_value') symbol = fields.Function(fields.Char(string='Symbol'), 'on_change_with_symbol', setter='set_test_value') + http_state = fields.Function(fields.Char(string='HTTP-State', + readonly=True), 'on_change_with_http_state') text = fields.Function(fields.Text(string='Result', readonly=True), 'on_change_with_text') + @classmethod + def default_url(cls): + """ defaul-url + """ + return 'https://' + @classmethod def default_rgxdate(cls): """ code to find date: dd.mm.yyyy """ return '(\\d{2}\\.\\d{2}\\.\\d{4})' + @classmethod + def default_rgxdatefmt(cls): + """ dd.mm.yyyy + """ + return '%d.%m.%Y' + + @classmethod + def default_rgxrate(cls): + """ nn,nn + """ + return '(\\d+,\\d+)' + + @classmethod + def default_rgxidtype(cls): + """ isin + """ + return 'isin' + + @classmethod + def default_rgxdecimal(cls): + """ comma + """ + return ',' + @classmethod def default_nohtml(cls): """ default: True """ return True - @fields.depends('nsin', 'isin', 'symbol', 'text') + @fields.depends(*fields_check) def on_change_nsin(self): """ run request """ self.call_online_source() - @fields.depends('nsin', 'isin', 'symbol', 'text') + @fields.depends(*fields_check) def on_change_isin(self): """ run request """ self.call_online_source() - @fields.depends('nsin', 'isin', 'symbol', 'text') + @fields.depends(*fields_check) def on_change_symbol(self): """ run request """ self.call_online_source() + def on_change_with_http_state(self, name=True): + return '' + def on_change_with_text(self, name=None): - """ return existing value - """ return '' def on_change_with_nsin(self, name=None): - """ return existing value - """ return '' def on_change_with_isin(self, name=None): - """ return existing value - """ return '' def on_change_with_symbol(self, name=None): - """ return existing value - """ return '' @fields.depends('url', 'isin', 'nsin', 'symbol') @@ -121,18 +178,20 @@ class OnlineSource(ModelSQL, ModelView): isin = self.isin, nsin = self.nsin, symbol = self.symbol, - debug=True, + debug = True, ) self.text = result.get('text', None) + self.http_state = result.get('http_state', None) def get_url_with_parameter(self, isin=None, nsin=None, symbol=None): """ generate url """ - return Template(self.url).substitute({ - 'isin': isin if isin is not None else '', - 'nsin': nsin if nsin is not None else '', - 'symbol': symbol if symbol is not None else '', - }) + if self.url: + return Template(self.url).substitute({ + 'isin': isin if isin is not None else '', + 'nsin': nsin if nsin is not None else '', + 'symbol': symbol if symbol is not None else '', + }) @classmethod def update_rate(cls, asset): @@ -140,34 +199,38 @@ class OnlineSource(ModelSQL, ModelView): """ if asset.updtsource is None: return - rate_data = cls.read_from_website(asset.updtsource) - @classmethod - def cleanup_spaces(cls, text): - """ remove multiple spaces + def get_regex_result(self, html_text, field_name): + """ run regex on html-text, convert result """ - len1 = -1 - while len1 != len(text): - len1 = len(text) - text = text.replace('\t', ' ').replace(' ', ' ') - text = text.replace('\n\r', '\n').replace('\n\n', '\n') - return text - - @classmethod - def get_regex_result(cls, html_text, rgxcode): - """ run regex on html-text - """ - print('\n## get_regex_result:', rgxcode, type(rgxcode)) - rgxcode = rgxcode or '' + rgxcode = getattr(self, field_name) or '' if len(rgxcode) == 0: - print('-- get_regex_result: stop 1') return None - result = re.compile(rgxcode).search(html_text) - if result is None: - print('-- get_regex_result: stop 2') + + search_result = re.compile(rgxcode).search(html_text) + if search_result is None: return None - print('-- get_regex_result - result:', result, result.group()) + + try : + result = search_result.group(1) + except IndexError: + result = search_result.group(0) + + if field_name == 'rgxrate': + dec_sep = [',', '.'] + dec_sep.remove(self.rgxdecimal) + + result = result.replace(dec_sep[0], '').replace(self.rgxdecimal, '.') + try : + result = Decimal(result) + except : + result = None + elif field_name == 'rgxdate': + try : + result = datetime.strptime(result, self.rgxdatefmt).date() + except : + result = None return result @classmethod @@ -176,6 +239,10 @@ class OnlineSource(ModelSQL, ModelView): """ result = {} + if updtsource.url == 'https://': + result['text'] = 'invalid url' + return result + res1 = requests.get( updtsource.get_url_with_parameter( isin = isin, @@ -185,24 +252,31 @@ class OnlineSource(ModelSQL, ModelView): allow_redirects=True, timeout=5.0) + result['http_state'] = '%(code)d: %(msg)s' % { + 'code': res1.status_code, + 'msg': res1.reason, + } + if res1.status_code in [200, 204]: - html = cls.cleanup_spaces(res1.text) + html = res1.text # remove html-tags if updtsource.nohtml: o1 = html2text.HTML2Text() o1.ignore_links = True + o1.ignore_tables = True + o1.bypass_tables = False + o1.single_line_break = True + o1.body_width = 0 html = o1.handle(html) del o1 if debug: result['text'] = html - result['rate'] = cls.get_regex_result(html, updtsource.rgxrate) - result['date'] = cls.get_regex_result(html, updtsource.rgxdate) - result['isin'] = cls.get_regex_result(html, updtsource.rgxisin) - result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin) - result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol) + result['rate'] = updtsource.get_regex_result(html, 'rgxrate') + result['date'] = updtsource.get_regex_result(html, 'rgxdate') + result['code'] = updtsource.get_regex_result(html, 'rgxcode') print('\n## result:', result) else : diff --git a/tests/__init__.py b/tests/__init__.py index 72793e9..1d46b3f 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -6,12 +6,14 @@ import unittest from trytond.modules.investment.tests.test_asset import AssetTestCase from trytond.modules.investment.tests.test_rate import RateTestCase +from trytond.modules.investment.tests.test_source import SourceTestCase __all__ = ['suite'] class InvestmentTestCase(\ + SourceTestCase, \ RateTestCase,\ AssetTestCase,\ ): diff --git a/tests/test_source.py b/tests/test_source.py new file mode 100644 index 0000000..6468eed --- /dev/null +++ b/tests/test_source.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# This file is part of the investment-module from m-ds for Tryton. +# The COPYRIGHT file at the top level of this repository contains the +# full copyright notices and license terms. + +from trytond.tests.test_tryton import ModuleTestCase, with_transaction +from trytond.pool import Pool +from trytond.modules.company.tests import create_company +from trytond.transaction import Transaction +from decimal import Decimal +from datetime import time, date + + +class SourceTestCase(ModuleTestCase): + 'Test online source module' + module = 'investment' + + @with_transaction() + def test_waitlist_source_check_regex(self): + """ create source, check convert + """ + pool = Pool() + OSource = pool.get('investment.source') + + osource, = OSource.create([{ + 'name': 'Source 1', + 'rgxdate': 'Course Date (\\d+.\\d+.\\d+) Today', + 'rgxdatefmt': '%d.%m.%Y', + 'rgxrate': 'High (\\d+,\\d+) EUR', + 'rgxdecimal': ',', + }]) + self.assertEqual(osource.rec_name, 'Source 1') + self.assertEqual(osource.get_regex_result( + 'The Course Date 14.03.2022 Today, High 13,43 EUR', + 'rgxdate' + ), date(2022, 3, 14)) + + self.assertEqual(osource.get_regex_result( + 'The Course Date 14.03.2022 Today, High 13,43 EUR', + 'rgxrate' + ), Decimal('13.43')) + + # iso-date + OSource.write(*[ + [osource], + { + 'rgxdate': 'Course Date (\\d+-\\d+-\\d+) Today', + 'rgxdatefmt': '%Y-%m-%d', + }]) + self.assertEqual(osource.get_regex_result( + 'The Course Date 2022-03-14 Today, High 13,43 EUR', + 'rgxdate' + ), date(2022, 3, 14)) + + +# end SourceTestCase diff --git a/view/source_form.xml b/view/source_form.xml index b99da97..bd69730 100644 --- a/view/source_form.xml +++ b/view/source_form.xml @@ -5,32 +5,36 @@ full copyright notices and license terms. -->