form optimiert, regex-konvertierung ok + test

This commit is contained in:
Frederik Jaeckel 2022-11-21 23:12:26 +01:00
parent 517746b4e9
commit b57c69abf0
6 changed files with 328 additions and 84 deletions

View file

@ -238,6 +238,10 @@ msgctxt "field:investment.source,isin:"
msgid "ISIN" msgid "ISIN"
msgstr "ISIN" msgstr "ISIN"
msgctxt "field:investment.source,http_state:"
msgid "HTTP-State"
msgstr "HTTP-Status"
msgctxt "field:investment.source,symbol:" msgctxt "field:investment.source,symbol:"
msgid "Symbol" msgid "Symbol"
msgstr "Symbol" msgstr "Symbol"
@ -270,29 +274,13 @@ msgctxt "help:investment.source,rgxrate:"
msgid "Regex code to find the rate in the downloaded HTML file." msgid "Regex code to find the rate in the downloaded HTML file."
msgstr "Regex-Code, um den Kurs in der heruntergeladenen HTML-Datei zu finden." msgstr "Regex-Code, um den Kurs in der heruntergeladenen HTML-Datei zu finden."
msgctxt "field:investment.source,rgxisin:" msgctxt "field:investment.source,rgxident:"
msgid "ISIN" msgid "Identifier"
msgstr "ISIN" msgstr "Bezeichner"
msgctxt "help:investment.source,rgxisin:" msgctxt "help:investment.source,rgxident:"
msgid "Regex code to find the ISIN in the downloaded HTML file." msgid "Regex code to find the identifier in the downloaded HTML file."
msgstr "Regex-Code, um die ISIN in der heruntergeladenen HTML-Datei zu finden." msgstr "Regex-Code, um den Bezeichner in der heruntergeladenen HTML-Datei zu finden."
msgctxt "field:investment.source,rgxnsin:"
msgid "NSIN"
msgstr "WKN"
msgctxt "help:investment.source,rgxnsin:"
msgid "Regex code to find the NSIN in the downloaded HTML file."
msgstr "Regex-Code, um die WKN in der heruntergeladenen HTML-Datei zu finden."
msgctxt "field:investment.source,rgxsymbol:"
msgid "Symbol"
msgstr "Symbol"
msgctxt "help:investment.source,rgxsymbol:"
msgid "Regex code to find the Symbol in the downloaded HTML file."
msgstr "Regex-Code, um das Symbol in der heruntergeladenen HTML-Datei zu finden."
msgctxt "field:investment.source,used_url:" msgctxt "field:investment.source,used_url:"
msgid "Used URL" msgid "Used URL"
@ -302,6 +290,42 @@ msgctxt "help:investment.source,used_url:"
msgid "This URL is used to retrieve the HTML file." msgid "This URL is used to retrieve the HTML file."
msgstr "Diese URL wird für den Abruf der HTML-Datei verwendet." msgstr "Diese URL wird für den Abruf der HTML-Datei verwendet."
msgctxt "field:investment.source,rgxdecimal:"
msgid "Decimal Separator"
msgstr "Dezimaltrenner"
msgctxt "help:investment.source,rgxdecimal:"
msgid "Decimal separator for converting the market value into a number."
msgstr "Dezimaltrenner für die Umwandlung des Kurswertes in eine Zahl."
msgctxt "field:investment.source,rgxidtype:"
msgid "ID-Type"
msgstr "ID-Typ"
msgctxt "help:investment.source,rgxidtype:"
msgid "Type of identifier used to validate the result."
msgstr "Typ des Bezeichners zur Validierung des Ergebnisses."
msgctxt "selection:investment.source,rgxidtype:"
msgid "ISIN"
msgstr "ISIN"
msgctxt "selection:investment.source,rgxidtype:"
msgid "NSIN"
msgstr "WKN"
msgctxt "selection:investment.source,rgxidtype:"
msgid "Symbol"
msgstr "Symbol"
msgctxt "field:investment.source,rgxdatefmt:"
msgid "Date format"
msgstr "Datumsformat"
msgctxt "help:investment.source,rgxidtype:"
msgid "Type of identifier used to validate the result."
msgstr "Typ des Bezeichners zur Validierung des Ergebnisses."
################### ###################
# investment.rate # # investment.rate #

View file

@ -166,6 +166,18 @@ msgctxt "view:investment.source:"
msgid "Test parameters" msgid "Test parameters"
msgstr "Test parameters" msgstr "Test parameters"
msgctxt "view:investment.source:"
msgid "URL parameter placeholders: ${isin}, ${nsin}, ${symbol}"
msgstr "URL parameter placeholders: ${isin}, ${nsin}, ${symbol}"
msgctxt "view:investment.source:"
msgid "Regular expressions to find data"
msgstr "Regular expressions to find data"
msgctxt "view:investment.source:"
msgid "How to"
msgstr "How to"
msgctxt "view:investment.source:" msgctxt "view:investment.source:"
msgid "Configure a source for receiving course data here. The source is queried with the parameters according to schedule." msgid "Configure a source for receiving course data here. The source is queried with the parameters according to schedule."
msgstr "Configure a source for receiving course data here. The source is queried with the parameters according to schedule." msgstr "Configure a source for receiving course data here. The source is queried with the parameters according to schedule."
@ -194,6 +206,10 @@ msgctxt "field:investment.source,isin:"
msgid "ISIN" msgid "ISIN"
msgstr "ISIN" msgstr "ISIN"
msgctxt "field:investment.source,http_state:"
msgid "HTTP-State"
msgstr "HTTP-State"
msgctxt "field:investment.source,symbol:" msgctxt "field:investment.source,symbol:"
msgid "Symbol" msgid "Symbol"
msgstr "Symbol" msgstr "Symbol"
@ -210,6 +226,74 @@ msgctxt "help:investment.source,nohtml:"
msgid "Removes HTML tags before the text is interpreted." msgid "Removes HTML tags before the text is interpreted."
msgstr "Removes HTML tags before the text is interpreted." msgstr "Removes HTML tags before the text is interpreted."
msgctxt "field:investment.source,rgxdate:"
msgid "Date"
msgstr "Date"
msgctxt "help:investment.source,rgxdate:"
msgid "Regex code to find the date in the downloaded HTML file."
msgstr "Regex code to find the date in the downloaded HTML file."
msgctxt "field:investment.source,rgxrate:"
msgid "Rate"
msgstr "Rate"
msgctxt "help:investment.source,rgxrate:"
msgid "Regex code to find the rate in the downloaded HTML file."
msgstr "Regex code to find the rate in the downloaded HTML file."
msgctxt "field:investment.source,rgxident:"
msgid "Identifier"
msgstr "Identifier"
msgctxt "help:investment.source,rgxident:"
msgid "Regex code to find the identifier in the downloaded HTML file."
msgstr "Regex code to find the identifier in the downloaded HTML file."
msgctxt "field:investment.source,used_url:"
msgid "Used URL"
msgstr "Used URL"
msgctxt "help:investment.source,used_url:"
msgid "This URL is used to retrieve the HTML file."
msgstr "This URL is used to retrieve the HTML file."
msgctxt "field:investment.source,rgxdecimal:"
msgid "Decimal Separator"
msgstr "Decimal Separator"
msgctxt "help:investment.source,rgxdecimal:"
msgid "Decimal separator for converting the market value into a number."
msgstr "Decimal separator for converting the market value into a number."
msgctxt "field:investment.source,rgxidtype:"
msgid "ID-Type"
msgstr "ID-Type"
msgctxt "help:investment.source,rgxidtype:"
msgid "Type of identifier used to validate the result."
msgstr "Type of identifier used to validate the result."
msgctxt "selection:investment.source,rgxidtype:"
msgid "ISIN"
msgstr "ISIN"
msgctxt "selection:investment.source,rgxidtype:"
msgid "NSIN"
msgstr "NSIN"
msgctxt "selection:investment.source,rgxidtype:"
msgid "Symbol"
msgstr "Symbol"
msgctxt "field:investment.source,rgxdatefmt:"
msgid "Date format"
msgstr "Date format"
msgctxt "help:investment.source,rgxidtype:"
msgid "Type of identifier used to validate the result."
msgstr "Type of identifier used to validate the result."
msgctxt "model:investment.rate,name:" msgctxt "model:investment.rate,name:"
msgid "Rate" msgid "Rate"
msgstr "Rate" msgstr "Rate"

View file

@ -5,12 +5,36 @@
from string import Template from string import Template
import requests, logging, html2text, re import requests, logging, html2text, re
from datetime import datetime
from decimal import Decimal
from trytond.model import ModelView, ModelSQL, fields, Unique, Check from trytond.model import ModelView, ModelSQL, fields, Unique, Check
from trytond.transaction import Transaction from trytond.transaction import Transaction
from trytond.pool import Pool from trytond.pool import Pool
from trytond.pyson import Eval, Bool
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
sel_rgxdecimal = [
('.', '.'),
(',', ','),
]
sel_rgxidtype = [
('isin', 'ISIN'),
('nsin', 'NSIN'),
('symbol', 'Symbol'),
]
sel_rgxdatefmt = [
('%d.%m.%Y', 'dd.mm.yyyy'),
('%m/%d/%Y', 'mm/dd/yyyy'),
('%Y-%m-%d', 'yyyy-mm-dd'),
]
fields_check = ['url', 'nsin', 'isin', 'symbol', 'text', 'http_state']
class OnlineSource(ModelSQL, ModelView): class OnlineSource(ModelSQL, ModelView):
'Online Source' 'Online Source'
__name__ = 'investment.source' __name__ = 'investment.source'
@ -21,14 +45,20 @@ class OnlineSource(ModelSQL, ModelView):
help='Removes HTML tags before the text is interpreted.') help='Removes HTML tags before the text is interpreted.')
rgxdate = fields.Char(string='Date', required=True, rgxdate = fields.Char(string='Date', required=True,
help='Regex code to find the date in the downloaded HTML file.') help='Regex code to find the date in the downloaded HTML file.')
rgxdatefmt = fields.Selection(string='Date format', required=True,
selection=sel_rgxdatefmt)
rgxrate = fields.Char(string='Rate', required=True, rgxrate = fields.Char(string='Rate', required=True,
help='Regex code to find the rate in the downloaded HTML file.') help='Regex code to find the rate in the downloaded HTML file.')
rgxisin = fields.Char(string='ISIN', rgxdecimal = fields.Selection(string='Decimal Separator', required=True,
help='Regex code to find the ISIN in the downloaded HTML file.') help='Decimal separator for converting the market value into a number.',
rgxnsin = fields.Char(string='NSIN', selection=sel_rgxdecimal)
help='Regex code to find the NSIN in the downloaded HTML file.') rgxident = fields.Char(string='Identifier',
rgxsymbol = fields.Char(string='Symbol', help='Regex code to find the identifier in the downloaded HTML file.')
help='Regex code to find the symbol in the downloaded HTML file.') rgxidtype = fields.Selection(string='ID-Type', selection=sel_rgxidtype,
help='Type of identifier used to validate the result.',
states={
'required': Bool(Eval('rgxident', '')),
}, depends=['rgxident'])
# field to test requests # field to test requests
used_url = fields.Function(fields.Char(string='Used URL', readonly=True, used_url = fields.Function(fields.Char(string='Used URL', readonly=True,
@ -40,57 +70,84 @@ class OnlineSource(ModelSQL, ModelView):
'on_change_with_isin', setter='set_test_value') 'on_change_with_isin', setter='set_test_value')
symbol = fields.Function(fields.Char(string='Symbol'), symbol = fields.Function(fields.Char(string='Symbol'),
'on_change_with_symbol', setter='set_test_value') 'on_change_with_symbol', setter='set_test_value')
http_state = fields.Function(fields.Char(string='HTTP-State',
readonly=True), 'on_change_with_http_state')
text = fields.Function(fields.Text(string='Result', text = fields.Function(fields.Text(string='Result',
readonly=True), 'on_change_with_text') readonly=True), 'on_change_with_text')
@classmethod
def default_url(cls):
""" defaul-url
"""
return 'https://'
@classmethod @classmethod
def default_rgxdate(cls): def default_rgxdate(cls):
""" code to find date: dd.mm.yyyy """ code to find date: dd.mm.yyyy
""" """
return '(\\d{2}\\.\\d{2}\\.\\d{4})' return '(\\d{2}\\.\\d{2}\\.\\d{4})'
@classmethod
def default_rgxdatefmt(cls):
""" dd.mm.yyyy
"""
return '%d.%m.%Y'
@classmethod
def default_rgxrate(cls):
""" nn,nn
"""
return '(\\d+,\\d+)'
@classmethod
def default_rgxidtype(cls):
""" isin
"""
return 'isin'
@classmethod
def default_rgxdecimal(cls):
""" comma
"""
return ','
@classmethod @classmethod
def default_nohtml(cls): def default_nohtml(cls):
""" default: True """ default: True
""" """
return True return True
@fields.depends('nsin', 'isin', 'symbol', 'text') @fields.depends(*fields_check)
def on_change_nsin(self): def on_change_nsin(self):
""" run request """ run request
""" """
self.call_online_source() self.call_online_source()
@fields.depends('nsin', 'isin', 'symbol', 'text') @fields.depends(*fields_check)
def on_change_isin(self): def on_change_isin(self):
""" run request """ run request
""" """
self.call_online_source() self.call_online_source()
@fields.depends('nsin', 'isin', 'symbol', 'text') @fields.depends(*fields_check)
def on_change_symbol(self): def on_change_symbol(self):
""" run request """ run request
""" """
self.call_online_source() self.call_online_source()
def on_change_with_http_state(self, name=True):
return ''
def on_change_with_text(self, name=None): def on_change_with_text(self, name=None):
""" return existing value
"""
return '' return ''
def on_change_with_nsin(self, name=None): def on_change_with_nsin(self, name=None):
""" return existing value
"""
return '' return ''
def on_change_with_isin(self, name=None): def on_change_with_isin(self, name=None):
""" return existing value
"""
return '' return ''
def on_change_with_symbol(self, name=None): def on_change_with_symbol(self, name=None):
""" return existing value
"""
return '' return ''
@fields.depends('url', 'isin', 'nsin', 'symbol') @fields.depends('url', 'isin', 'nsin', 'symbol')
@ -121,13 +178,15 @@ class OnlineSource(ModelSQL, ModelView):
isin = self.isin, isin = self.isin,
nsin = self.nsin, nsin = self.nsin,
symbol = self.symbol, symbol = self.symbol,
debug=True, debug = True,
) )
self.text = result.get('text', None) self.text = result.get('text', None)
self.http_state = result.get('http_state', None)
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None): def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
""" generate url """ generate url
""" """
if self.url:
return Template(self.url).substitute({ return Template(self.url).substitute({
'isin': isin if isin is not None else '', 'isin': isin if isin is not None else '',
'nsin': nsin if nsin is not None else '', 'nsin': nsin if nsin is not None else '',
@ -140,34 +199,38 @@ class OnlineSource(ModelSQL, ModelView):
""" """
if asset.updtsource is None: if asset.updtsource is None:
return return
rate_data = cls.read_from_website(asset.updtsource) rate_data = cls.read_from_website(asset.updtsource)
@classmethod def get_regex_result(self, html_text, field_name):
def cleanup_spaces(cls, text): """ run regex on html-text, convert result
""" remove multiple spaces
""" """
len1 = -1 rgxcode = getattr(self, field_name) or ''
while len1 != len(text):
len1 = len(text)
text = text.replace('\t', ' ').replace(' ', ' ')
text = text.replace('\n\r', '\n').replace('\n\n', '\n')
return text
@classmethod
def get_regex_result(cls, html_text, rgxcode):
""" run regex on html-text
"""
print('\n## get_regex_result:', rgxcode, type(rgxcode))
rgxcode = rgxcode or ''
if len(rgxcode) == 0: if len(rgxcode) == 0:
print('-- get_regex_result: stop 1')
return None return None
result = re.compile(rgxcode).search(html_text)
if result is None: search_result = re.compile(rgxcode).search(html_text)
print('-- get_regex_result: stop 2') if search_result is None:
return None return None
print('-- get_regex_result - result:', result, result.group())
try :
result = search_result.group(1)
except IndexError:
result = search_result.group(0)
if field_name == 'rgxrate':
dec_sep = [',', '.']
dec_sep.remove(self.rgxdecimal)
result = result.replace(dec_sep[0], '').replace(self.rgxdecimal, '.')
try :
result = Decimal(result)
except :
result = None
elif field_name == 'rgxdate':
try :
result = datetime.strptime(result, self.rgxdatefmt).date()
except :
result = None
return result return result
@classmethod @classmethod
@ -176,6 +239,10 @@ class OnlineSource(ModelSQL, ModelView):
""" """
result = {} result = {}
if updtsource.url == 'https://':
result['text'] = 'invalid url'
return result
res1 = requests.get( res1 = requests.get(
updtsource.get_url_with_parameter( updtsource.get_url_with_parameter(
isin = isin, isin = isin,
@ -185,24 +252,31 @@ class OnlineSource(ModelSQL, ModelView):
allow_redirects=True, allow_redirects=True,
timeout=5.0) timeout=5.0)
result['http_state'] = '%(code)d: %(msg)s' % {
'code': res1.status_code,
'msg': res1.reason,
}
if res1.status_code in [200, 204]: if res1.status_code in [200, 204]:
html = cls.cleanup_spaces(res1.text) html = res1.text
# remove html-tags # remove html-tags
if updtsource.nohtml: if updtsource.nohtml:
o1 = html2text.HTML2Text() o1 = html2text.HTML2Text()
o1.ignore_links = True o1.ignore_links = True
o1.ignore_tables = True
o1.bypass_tables = False
o1.single_line_break = True
o1.body_width = 0
html = o1.handle(html) html = o1.handle(html)
del o1 del o1
if debug: if debug:
result['text'] = html result['text'] = html
result['rate'] = cls.get_regex_result(html, updtsource.rgxrate) result['rate'] = updtsource.get_regex_result(html, 'rgxrate')
result['date'] = cls.get_regex_result(html, updtsource.rgxdate) result['date'] = updtsource.get_regex_result(html, 'rgxdate')
result['isin'] = cls.get_regex_result(html, updtsource.rgxisin) result['code'] = updtsource.get_regex_result(html, 'rgxcode')
result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin)
result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol)
print('\n## result:', result) print('\n## result:', result)
else : else :

View file

@ -6,12 +6,14 @@ import unittest
from trytond.modules.investment.tests.test_asset import AssetTestCase from trytond.modules.investment.tests.test_asset import AssetTestCase
from trytond.modules.investment.tests.test_rate import RateTestCase from trytond.modules.investment.tests.test_rate import RateTestCase
from trytond.modules.investment.tests.test_source import SourceTestCase
__all__ = ['suite'] __all__ = ['suite']
class InvestmentTestCase(\ class InvestmentTestCase(\
SourceTestCase, \
RateTestCase,\ RateTestCase,\
AssetTestCase,\ AssetTestCase,\
): ):

56
tests/test_source.py Normal file
View file

@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
# This file is part of the investment-module from m-ds for Tryton.
# The COPYRIGHT file at the top level of this repository contains the
# full copyright notices and license terms.
from trytond.tests.test_tryton import ModuleTestCase, with_transaction
from trytond.pool import Pool
from trytond.modules.company.tests import create_company
from trytond.transaction import Transaction
from decimal import Decimal
from datetime import time, date
class SourceTestCase(ModuleTestCase):
'Test online source module'
module = 'investment'
@with_transaction()
def test_waitlist_source_check_regex(self):
""" create source, check convert
"""
pool = Pool()
OSource = pool.get('investment.source')
osource, = OSource.create([{
'name': 'Source 1',
'rgxdate': 'Course Date (\\d+.\\d+.\\d+) Today',
'rgxdatefmt': '%d.%m.%Y',
'rgxrate': 'High (\\d+,\\d+) EUR',
'rgxdecimal': ',',
}])
self.assertEqual(osource.rec_name, 'Source 1')
self.assertEqual(osource.get_regex_result(
'The Course Date 14.03.2022 Today, High 13,43 EUR',
'rgxdate'
), date(2022, 3, 14))
self.assertEqual(osource.get_regex_result(
'The Course Date 14.03.2022 Today, High 13,43 EUR',
'rgxrate'
), Decimal('13.43'))
# iso-date
OSource.write(*[
[osource],
{
'rgxdate': 'Course Date (\\d+-\\d+-\\d+) Today',
'rgxdatefmt': '%Y-%m-%d',
}])
self.assertEqual(osource.get_regex_result(
'The Course Date 2022-03-14 Today, High 13,43 EUR',
'rgxdate'
), date(2022, 3, 14))
# end SourceTestCase

View file

@ -5,32 +5,36 @@ full copyright notices and license terms. -->
<form col="6"> <form col="6">
<label name="name"/> <label name="name"/>
<field name="name"/> <field name="name"/>
<newline/>
<label name="url"/> <label name="url"/>
<field name="url" colspan="3"/> <field name="url" colspan="3"/>
<label name="nohtml"/> <label name="nohtml"/>
<field name="nohtml"/> <field name="nohtml"/>
<label xalign="0.0" colspan="6" id="labtempl" <label id="labtp1" string=" "/>
<label xalign="0.0" colspan="3" id="labtempl"
string="URL parameter placeholders: ${isin}, ${nsin}, ${symbol}"/> string="URL parameter placeholders: ${isin}, ${nsin}, ${symbol}"/>
<separator colspan="6" id="seprgx" string="Regular expressions to find data"/> <separator colspan="6" id="seprgx" string="Regular expressions to find data"/>
<label name="rgxdate"/> <label name="rgxdate"/>
<field name="rgxdate"/> <field name="rgxdate"/>
<label name="rgxdatefmt"/>
<field name="rgxdatefmt"/>
<label name="rgxident"/>
<field name="rgxident"/>
<label name="rgxrate"/> <label name="rgxrate"/>
<field name="rgxrate"/> <field name="rgxrate"/>
<newline/> <label name="rgxdecimal"/>
<label name="rgxisin"/> <field name="rgxdecimal"/>
<field name="rgxisin"/> <label name="rgxidtype"/>
<label name="rgxnsin"/> <field name="rgxidtype"/>
<field name="rgxnsin"/>
<label name="rgxsymbol"/>
<field name="rgxsymbol"/>
<notebook colspan="6"> <notebook colspan="6">
<page id="pgparam" col="6" string="Test parameters"> <page id="pgparam" col="6" string="Test parameters">
<label name="used_url"/> <label name="used_url"/>
<field name="used_url" colspan="5"/> <field name="used_url" colspan="3"/>
<label name="http_state"/>
<field name="http_state"/>
<label name="isin"/> <label name="isin"/>
<field name="isin"/> <field name="isin"/>