abruf weiter gebaut
This commit is contained in:
parent
d54f3805ab
commit
2b80f22897
3 changed files with 174 additions and 25 deletions
60
locale/de.po
60
locale/de.po
|
@ -198,6 +198,18 @@ msgctxt "view:investment.source:"
|
|||
msgid "Test parameters"
|
||||
msgstr "Testparameter"
|
||||
|
||||
msgctxt "view:investment.source:"
|
||||
msgid "URL parameter placeholders: ${isin}, ${nsin}, ${symbol}"
|
||||
msgstr "Platzhalter für URL-Parameter: ${isin}, ${nsin}, ${symbol}"
|
||||
|
||||
msgctxt "view:investment.source:"
|
||||
msgid "Regular expressions to find data"
|
||||
msgstr "Reguläre Ausdrücke zum Finden der Daten"
|
||||
|
||||
msgctxt "view:investment.source:"
|
||||
msgid "How to"
|
||||
msgstr "So funktionierts"
|
||||
|
||||
msgctxt "view:investment.source:"
|
||||
msgid "Configure a source for receiving course data here. The source is queried with the parameters according to schedule."
|
||||
msgstr "Konfigurieren Sie hier eine Quelle für den Empfang von Kursdaten. Die Quelle wird mit den Paramtern nach Zeitplan abgefragt."
|
||||
|
@ -242,6 +254,54 @@ msgctxt "help:investment.source,nohtml:"
|
|||
msgid "Removes HTML tags before the text is interpreted."
|
||||
msgstr "Entfernt HTML-Tags bevor der Text interpretiert wird."
|
||||
|
||||
msgctxt "field:investment.source,rgxdate:"
|
||||
msgid "Date"
|
||||
msgstr "Datum"
|
||||
|
||||
msgctxt "help:investment.source,rgxdate:"
|
||||
msgid "Regex code to find the date in the downloaded HTML file."
|
||||
msgstr "Regex-Code, um das Datum in der heruntergeladenen HTML-Datei zu finden."
|
||||
|
||||
msgctxt "field:investment.source,rgxrate:"
|
||||
msgid "Rate"
|
||||
msgstr "Kurs"
|
||||
|
||||
msgctxt "help:investment.source,rgxrate:"
|
||||
msgid "Regex code to find the rate in the downloaded HTML file."
|
||||
msgstr "Regex-Code, um den Kurs in der heruntergeladenen HTML-Datei zu finden."
|
||||
|
||||
msgctxt "field:investment.source,rgxisin:"
|
||||
msgid "ISIN"
|
||||
msgstr "ISIN"
|
||||
|
||||
msgctxt "help:investment.source,rgxisin:"
|
||||
msgid "Regex code to find the ISIN in the downloaded HTML file."
|
||||
msgstr "Regex-Code, um die ISIN in der heruntergeladenen HTML-Datei zu finden."
|
||||
|
||||
msgctxt "field:investment.source,rgxnsin:"
|
||||
msgid "NSIN"
|
||||
msgstr "WKN"
|
||||
|
||||
msgctxt "help:investment.source,rgxnsin:"
|
||||
msgid "Regex code to find the NSIN in the downloaded HTML file."
|
||||
msgstr "Regex-Code, um die WKN in der heruntergeladenen HTML-Datei zu finden."
|
||||
|
||||
msgctxt "field:investment.source,rgxsymbol:"
|
||||
msgid "Symbol"
|
||||
msgstr "Symbol"
|
||||
|
||||
msgctxt "help:investment.source,rgxsymbol:"
|
||||
msgid "Regex code to find the Symbol in the downloaded HTML file."
|
||||
msgstr "Regex-Code, um das Symbol in der heruntergeladenen HTML-Datei zu finden."
|
||||
|
||||
msgctxt "field:investment.source,used_url:"
|
||||
msgid "Used URL"
|
||||
msgstr "verwendete URL"
|
||||
|
||||
msgctxt "help:investment.source,used_url:"
|
||||
msgid "This URL is used to retrieve the HTML file."
|
||||
msgstr "Diese URL wird für den Abruf der HTML-Datei verwendet."
|
||||
|
||||
|
||||
###################
|
||||
# investment.rate #
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
# The COPYRIGHT file at the top level of this repository contains the
|
||||
# full copyright notices and license terms.
|
||||
|
||||
import requests, logging, html2text
|
||||
from string import Template
|
||||
import requests, logging, html2text, re
|
||||
from trytond.model import ModelView, ModelSQL, fields, Unique, Check
|
||||
from trytond.transaction import Transaction
|
||||
from trytond.pool import Pool
|
||||
|
@ -18,8 +19,22 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
url = fields.Char(string='URL', required=True)
|
||||
nohtml = fields.Boolean(string='Remove HTML',
|
||||
help='Removes HTML tags before the text is interpreted.')
|
||||
rgxdate = fields.Char(string='Date', required=True,
|
||||
help='Regex code to find the date in the downloaded HTML file.')
|
||||
rgxrate = fields.Char(string='Rate', required=True,
|
||||
help='Regex code to find the drte in the downloaded HTML file.')
|
||||
rgxisin = fields.Char(string='ISIN',
|
||||
help='Regex code to find the ISIN in the downloaded HTML file.')
|
||||
rgxnsin = fields.Char(string='NSIN',
|
||||
help='Regex code to find the NSIN in the downloaded HTML file.')
|
||||
rgxsymbol = fields.Char(string='Symbol',
|
||||
help='Regex code to find the symbol in the downloaded HTML file.')
|
||||
|
||||
|
||||
# field to test requests
|
||||
used_url = fields.Function(fields.Char(string='Used URL', readonly=True,
|
||||
help='This URL is used to retrieve the HTML file.'),
|
||||
'on_change_with_used_url')
|
||||
nsin = fields.Function(fields.Char(string='NSIN'),
|
||||
'on_change_with_nsin', setter='set_test_value')
|
||||
isin = fields.Function(fields.Char(string='ISIN'),
|
||||
|
@ -29,13 +44,11 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
text = fields.Function(fields.Text(string='Result',
|
||||
readonly=True), 'on_change_with_text')
|
||||
|
||||
def call_online_source(self):
|
||||
""" use updated values to call online-source
|
||||
@classmethod
|
||||
def default_rgxdate(cls):
|
||||
""" code to find date: dd.mm.yyyy
|
||||
"""
|
||||
OSourc = Pool().get('investment.source')
|
||||
|
||||
result = OSourc.read_from_website(self, debug=True)
|
||||
self.text = result.get('text', None)
|
||||
return '(\d{2}\.\d{2}\.\d{4})'
|
||||
|
||||
@classmethod
|
||||
def default_nohtml(cls):
|
||||
|
@ -43,6 +56,14 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
"""
|
||||
return True
|
||||
|
||||
def call_online_source(self):
|
||||
""" use updated values to call online-source
|
||||
"""
|
||||
OSourc = Pool().get('investment.source')
|
||||
|
||||
result = OSourc.read_from_website(self, debug=False)
|
||||
self.text = result.get('text', None)
|
||||
|
||||
@fields.depends('nsin', 'isin', 'symbol', 'text')
|
||||
def on_change_nsin(self):
|
||||
""" run request
|
||||
|
@ -81,6 +102,26 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
"""
|
||||
return ''
|
||||
|
||||
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
|
||||
""" generate url
|
||||
"""
|
||||
return Template(self.url).substitute({
|
||||
'isin': isin if isin is not None else '',
|
||||
'nsin': nsin if nsin is not None else '',
|
||||
'symbol': symbol if symbol is not None else '',
|
||||
})
|
||||
|
||||
@fields.depends('url', 'isin', 'nsin', 'symbol')
|
||||
def on_change_with_used_url(self, name=None):
|
||||
""" get url for testing
|
||||
"""
|
||||
if self.url:
|
||||
return self.get_url_with_parameter(
|
||||
isin = self.isin,
|
||||
nsin = self.nsin,
|
||||
symbol = self.symbol,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def set_test_value(cls, record, name, value):
|
||||
""" dont store it
|
||||
|
@ -107,14 +148,33 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
text = text.replace('\n\r', '\n').replace('\n\n', '\n')
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def get_regex_result(cls, html_text, rgxcode):
|
||||
""" run regex on html-text
|
||||
"""
|
||||
print('\n## get_regex_result:', rgxcode, type(rgxcode))
|
||||
rgxcode = rgxcode or ''
|
||||
if len(rgxcode) == 0:
|
||||
return None
|
||||
result = re.compile(rgxcode).match(html_text)
|
||||
if result is None:
|
||||
return None
|
||||
print('-- result:', result, result.group())
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def read_from_website(cls, updtsource, debug=False):
|
||||
""" read from url, extract values
|
||||
"""
|
||||
result = {}
|
||||
|
||||
res1 = requests.get(updtsource.url)
|
||||
if res1.reason == 'OK':
|
||||
res1 = requests.get(
|
||||
updtsource.url,
|
||||
allow_redirects=True,
|
||||
timeout=5.0)
|
||||
|
||||
print('-- res1:', res1.history)
|
||||
if res1.status_code in [200, 204, 404]:
|
||||
html = cls.cleanup_spaces(res1.text)
|
||||
|
||||
# remove html
|
||||
|
@ -126,8 +186,16 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
|
||||
if debug:
|
||||
result['text'] = html
|
||||
|
||||
result['rate'] = cls.get_regex_result(html, updtsource.rgxrate)
|
||||
result['date'] = cls.get_regex_result(html, updtsource.rgxdate)
|
||||
result['isin'] = cls.get_regex_result(html, updtsource.rgxisin)
|
||||
result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin)
|
||||
result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol)
|
||||
|
||||
print('\n## result:', result)
|
||||
else :
|
||||
logger.error('read_from_website: %s' % res1.text)
|
||||
#logger.error('read_from_website: %s' % res1.text)
|
||||
if debug:
|
||||
result['text'] = res1.text
|
||||
return result
|
||||
|
|
|
@ -11,14 +11,26 @@ full copyright notices and license terms. -->
|
|||
<field name="url" colspan="3"/>
|
||||
<label name="nohtml"/>
|
||||
<field name="nohtml"/>
|
||||
<label xalign="0.0" colspan="6" id="labtempl"
|
||||
string="URL parameter placeholders: ${isin}, ${nsin}, ${symbol}"/>
|
||||
|
||||
<separator colspan="6" id="sepsymb" string="Test parameters"/>
|
||||
<label colspan="6" xalign="0.0" id="lab1"
|
||||
string="Configure a source for receiving course data here. The source is queried with the parameters according to schedule."/>
|
||||
<label colspan="6" xalign="0.0" id="lab2"
|
||||
string="Select a website where the price data, ISIN, date, etc. is contained in the loaded HTML file."/>
|
||||
<label colspan="6" xalign="0.0" id="lab3"
|
||||
string="Purely javascript-based websites do not work here."/>
|
||||
<separator colspan="6" id="seprgx" string="Regular expressions to find data"/>
|
||||
<label name="rgxdate"/>
|
||||
<field name="rgxdate"/>
|
||||
<label name="rgxrate"/>
|
||||
<field name="rgxrate"/>
|
||||
<newline/>
|
||||
<label name="rgxisin"/>
|
||||
<field name="rgxisin"/>
|
||||
<label name="rgxnsin"/>
|
||||
<field name="rgxnsin"/>
|
||||
<label name="rgxsymbol"/>
|
||||
<field name="rgxsymbol"/>
|
||||
|
||||
<notebook colspan="6">
|
||||
<page id="pgparam" col="6" string="Test parameters">
|
||||
<label name="used_url"/>
|
||||
<field name="used_url" colspan="5"/>
|
||||
|
||||
<label name="isin"/>
|
||||
<field name="isin"/>
|
||||
|
@ -29,5 +41,14 @@ full copyright notices and license terms. -->
|
|||
|
||||
<separator name="text" colspan="6" string="Result"/>
|
||||
<field name="text" colspan="6" xexpand="1"/>
|
||||
|
||||
</page>
|
||||
<page id="pginfo" col="1" string="How to">
|
||||
<label xalign="0.0" id="lab1"
|
||||
string="Configure a source for receiving course data here. The source is queried with the parameters according to schedule."/>
|
||||
<label xalign="0.0" id="lab2"
|
||||
string="Select a website where the price data, ISIN, date, etc. is contained in the loaded HTML file."/>
|
||||
<label xalign="0.0" id="lab3"
|
||||
string="Purely javascript-based websites do not work here."/>
|
||||
</page>
|
||||
</notebook>
|
||||
</form>
|
||||
|
|
Loading…
Reference in a new issue