abruf weiter gebaut

This commit is contained in:
Frederik Jaeckel 2022-11-20 20:40:51 +01:00
parent d54f3805ab
commit 2b80f22897
3 changed files with 174 additions and 25 deletions

View file

@ -198,6 +198,18 @@ msgctxt "view:investment.source:"
msgid "Test parameters" msgid "Test parameters"
msgstr "Testparameter" msgstr "Testparameter"
msgctxt "view:investment.source:"
msgid "URL parameter placeholders: ${isin}, ${nsin}, ${symbol}"
msgstr "Platzhalter für URL-Parameter: ${isin}, ${nsin}, ${symbol}"
msgctxt "view:investment.source:"
msgid "Regular expressions to find data"
msgstr "Reguläre Ausdrücke zum Finden der Daten"
msgctxt "view:investment.source:"
msgid "How to"
msgstr "So funktionierts"
msgctxt "view:investment.source:" msgctxt "view:investment.source:"
msgid "Configure a source for receiving course data here. The source is queried with the parameters according to schedule." msgid "Configure a source for receiving course data here. The source is queried with the parameters according to schedule."
msgstr "Konfigurieren Sie hier eine Quelle für den Empfang von Kursdaten. Die Quelle wird mit den Paramtern nach Zeitplan abgefragt." msgstr "Konfigurieren Sie hier eine Quelle für den Empfang von Kursdaten. Die Quelle wird mit den Paramtern nach Zeitplan abgefragt."
@ -242,6 +254,54 @@ msgctxt "help:investment.source,nohtml:"
msgid "Removes HTML tags before the text is interpreted." msgid "Removes HTML tags before the text is interpreted."
msgstr "Entfernt HTML-Tags bevor der Text interpretiert wird." msgstr "Entfernt HTML-Tags bevor der Text interpretiert wird."
msgctxt "field:investment.source,rgxdate:"
msgid "Date"
msgstr "Datum"
msgctxt "help:investment.source,rgxdate:"
msgid "Regex code to find the date in the downloaded HTML file."
msgstr "Regex-Code, um das Datum in der heruntergeladenen HTML-Datei zu finden."
msgctxt "field:investment.source,rgxrate:"
msgid "Rate"
msgstr "Kurs"
msgctxt "help:investment.source,rgxrate:"
msgid "Regex code to find the rate in the downloaded HTML file."
msgstr "Regex-Code, um den Kurs in der heruntergeladenen HTML-Datei zu finden."
msgctxt "field:investment.source,rgxisin:"
msgid "ISIN"
msgstr "ISIN"
msgctxt "help:investment.source,rgxisin:"
msgid "Regex code to find the ISIN in the downloaded HTML file."
msgstr "Regex-Code, um die ISIN in der heruntergeladenen HTML-Datei zu finden."
msgctxt "field:investment.source,rgxnsin:"
msgid "NSIN"
msgstr "WKN"
msgctxt "help:investment.source,rgxnsin:"
msgid "Regex code to find the NSIN in the downloaded HTML file."
msgstr "Regex-Code, um die WKN in der heruntergeladenen HTML-Datei zu finden."
msgctxt "field:investment.source,rgxsymbol:"
msgid "Symbol"
msgstr "Symbol"
msgctxt "help:investment.source,rgxsymbol:"
msgid "Regex code to find the Symbol in the downloaded HTML file."
msgstr "Regex-Code, um das Symbol in der heruntergeladenen HTML-Datei zu finden."
msgctxt "field:investment.source,used_url:"
msgid "Used URL"
msgstr "verwendete URL"
msgctxt "help:investment.source,used_url:"
msgid "This URL is used to retrieve the HTML file."
msgstr "Diese URL wird für den Abruf der HTML-Datei verwendet."
################### ###################
# investment.rate # # investment.rate #

View file

@ -3,7 +3,8 @@
# The COPYRIGHT file at the top level of this repository contains the # The COPYRIGHT file at the top level of this repository contains the
# full copyright notices and license terms. # full copyright notices and license terms.
import requests, logging, html2text from string import Template
import requests, logging, html2text, re
from trytond.model import ModelView, ModelSQL, fields, Unique, Check from trytond.model import ModelView, ModelSQL, fields, Unique, Check
from trytond.transaction import Transaction from trytond.transaction import Transaction
from trytond.pool import Pool from trytond.pool import Pool
@ -18,8 +19,22 @@ class OnlineSource(ModelSQL, ModelView):
url = fields.Char(string='URL', required=True) url = fields.Char(string='URL', required=True)
nohtml = fields.Boolean(string='Remove HTML', nohtml = fields.Boolean(string='Remove HTML',
help='Removes HTML tags before the text is interpreted.') help='Removes HTML tags before the text is interpreted.')
rgxdate = fields.Char(string='Date', required=True,
help='Regex code to find the date in the downloaded HTML file.')
rgxrate = fields.Char(string='Rate', required=True,
help='Regex code to find the drte in the downloaded HTML file.')
rgxisin = fields.Char(string='ISIN',
help='Regex code to find the ISIN in the downloaded HTML file.')
rgxnsin = fields.Char(string='NSIN',
help='Regex code to find the NSIN in the downloaded HTML file.')
rgxsymbol = fields.Char(string='Symbol',
help='Regex code to find the symbol in the downloaded HTML file.')
# field to test requests # field to test requests
used_url = fields.Function(fields.Char(string='Used URL', readonly=True,
help='This URL is used to retrieve the HTML file.'),
'on_change_with_used_url')
nsin = fields.Function(fields.Char(string='NSIN'), nsin = fields.Function(fields.Char(string='NSIN'),
'on_change_with_nsin', setter='set_test_value') 'on_change_with_nsin', setter='set_test_value')
isin = fields.Function(fields.Char(string='ISIN'), isin = fields.Function(fields.Char(string='ISIN'),
@ -29,13 +44,11 @@ class OnlineSource(ModelSQL, ModelView):
text = fields.Function(fields.Text(string='Result', text = fields.Function(fields.Text(string='Result',
readonly=True), 'on_change_with_text') readonly=True), 'on_change_with_text')
def call_online_source(self): @classmethod
""" use updated values to call online-source def default_rgxdate(cls):
""" code to find date: dd.mm.yyyy
""" """
OSourc = Pool().get('investment.source') return '(\d{2}\.\d{2}\.\d{4})'
result = OSourc.read_from_website(self, debug=True)
self.text = result.get('text', None)
@classmethod @classmethod
def default_nohtml(cls): def default_nohtml(cls):
@ -43,6 +56,14 @@ class OnlineSource(ModelSQL, ModelView):
""" """
return True return True
def call_online_source(self):
""" use updated values to call online-source
"""
OSourc = Pool().get('investment.source')
result = OSourc.read_from_website(self, debug=False)
self.text = result.get('text', None)
@fields.depends('nsin', 'isin', 'symbol', 'text') @fields.depends('nsin', 'isin', 'symbol', 'text')
def on_change_nsin(self): def on_change_nsin(self):
""" run request """ run request
@ -81,6 +102,26 @@ class OnlineSource(ModelSQL, ModelView):
""" """
return '' return ''
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
""" generate url
"""
return Template(self.url).substitute({
'isin': isin if isin is not None else '',
'nsin': nsin if nsin is not None else '',
'symbol': symbol if symbol is not None else '',
})
@fields.depends('url', 'isin', 'nsin', 'symbol')
def on_change_with_used_url(self, name=None):
""" get url for testing
"""
if self.url:
return self.get_url_with_parameter(
isin = self.isin,
nsin = self.nsin,
symbol = self.symbol,
)
@classmethod @classmethod
def set_test_value(cls, record, name, value): def set_test_value(cls, record, name, value):
""" dont store it """ dont store it
@ -107,14 +148,33 @@ class OnlineSource(ModelSQL, ModelView):
text = text.replace('\n\r', '\n').replace('\n\n', '\n') text = text.replace('\n\r', '\n').replace('\n\n', '\n')
return text return text
@classmethod
def get_regex_result(cls, html_text, rgxcode):
""" run regex on html-text
"""
print('\n## get_regex_result:', rgxcode, type(rgxcode))
rgxcode = rgxcode or ''
if len(rgxcode) == 0:
return None
result = re.compile(rgxcode).match(html_text)
if result is None:
return None
print('-- result:', result, result.group())
return result
@classmethod @classmethod
def read_from_website(cls, updtsource, debug=False): def read_from_website(cls, updtsource, debug=False):
""" read from url, extract values """ read from url, extract values
""" """
result = {} result = {}
res1 = requests.get(updtsource.url) res1 = requests.get(
if res1.reason == 'OK': updtsource.url,
allow_redirects=True,
timeout=5.0)
print('-- res1:', res1.history)
if res1.status_code in [200, 204, 404]:
html = cls.cleanup_spaces(res1.text) html = cls.cleanup_spaces(res1.text)
# remove html # remove html
@ -126,8 +186,16 @@ class OnlineSource(ModelSQL, ModelView):
if debug: if debug:
result['text'] = html result['text'] = html
result['rate'] = cls.get_regex_result(html, updtsource.rgxrate)
result['date'] = cls.get_regex_result(html, updtsource.rgxdate)
result['isin'] = cls.get_regex_result(html, updtsource.rgxisin)
result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin)
result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol)
print('\n## result:', result)
else : else :
logger.error('read_from_website: %s' % res1.text) #logger.error('read_from_website: %s' % res1.text)
if debug: if debug:
result['text'] = res1.text result['text'] = res1.text
return result return result

View file

@ -11,23 +11,44 @@ full copyright notices and license terms. -->
<field name="url" colspan="3"/> <field name="url" colspan="3"/>
<label name="nohtml"/> <label name="nohtml"/>
<field name="nohtml"/> <field name="nohtml"/>
<label xalign="0.0" colspan="6" id="labtempl"
string="URL parameter placeholders: ${isin}, ${nsin}, ${symbol}"/>
<separator colspan="6" id="sepsymb" string="Test parameters"/> <separator colspan="6" id="seprgx" string="Regular expressions to find data"/>
<label colspan="6" xalign="0.0" id="lab1" <label name="rgxdate"/>
string="Configure a source for receiving course data here. The source is queried with the parameters according to schedule."/> <field name="rgxdate"/>
<label colspan="6" xalign="0.0" id="lab2" <label name="rgxrate"/>
string="Select a website where the price data, ISIN, date, etc. is contained in the loaded HTML file."/> <field name="rgxrate"/>
<label colspan="6" xalign="0.0" id="lab3" <newline/>
string="Purely javascript-based websites do not work here."/> <label name="rgxisin"/>
<field name="rgxisin"/>
<label name="rgxnsin"/>
<field name="rgxnsin"/>
<label name="rgxsymbol"/>
<field name="rgxsymbol"/>
<label name="isin"/> <notebook colspan="6">
<field name="isin"/> <page id="pgparam" col="6" string="Test parameters">
<label name="nsin"/> <label name="used_url"/>
<field name="nsin"/> <field name="used_url" colspan="5"/>
<label name="symbol"/>
<field name="symbol"/>
<separator name="text" colspan="6" string="Result"/> <label name="isin"/>
<field name="text" colspan="6" xexpand="1"/> <field name="isin"/>
<label name="nsin"/>
<field name="nsin"/>
<label name="symbol"/>
<field name="symbol"/>
<separator name="text" colspan="6" string="Result"/>
<field name="text" colspan="6" xexpand="1"/>
</page>
<page id="pginfo" col="1" string="How to">
<label xalign="0.0" id="lab1"
string="Configure a source for receiving course data here. The source is queried with the parameters according to schedule."/>
<label xalign="0.0" id="lab2"
string="Select a website where the price data, ISIN, date, etc. is contained in the loaded HTML file."/>
<label xalign="0.0" id="lab3"
string="Purely javascript-based websites do not work here."/>
</page>
</notebook>
</form> </form>