abruf weiter gebaut
This commit is contained in:
parent
d54f3805ab
commit
2b80f22897
3 changed files with 174 additions and 25 deletions
|
@ -3,7 +3,8 @@
|
|||
# The COPYRIGHT file at the top level of this repository contains the
|
||||
# full copyright notices and license terms.
|
||||
|
||||
import requests, logging, html2text
|
||||
from string import Template
|
||||
import requests, logging, html2text, re
|
||||
from trytond.model import ModelView, ModelSQL, fields, Unique, Check
|
||||
from trytond.transaction import Transaction
|
||||
from trytond.pool import Pool
|
||||
|
@ -18,8 +19,22 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
url = fields.Char(string='URL', required=True)
|
||||
nohtml = fields.Boolean(string='Remove HTML',
|
||||
help='Removes HTML tags before the text is interpreted.')
|
||||
rgxdate = fields.Char(string='Date', required=True,
|
||||
help='Regex code to find the date in the downloaded HTML file.')
|
||||
rgxrate = fields.Char(string='Rate', required=True,
|
||||
help='Regex code to find the drte in the downloaded HTML file.')
|
||||
rgxisin = fields.Char(string='ISIN',
|
||||
help='Regex code to find the ISIN in the downloaded HTML file.')
|
||||
rgxnsin = fields.Char(string='NSIN',
|
||||
help='Regex code to find the NSIN in the downloaded HTML file.')
|
||||
rgxsymbol = fields.Char(string='Symbol',
|
||||
help='Regex code to find the symbol in the downloaded HTML file.')
|
||||
|
||||
|
||||
# field to test requests
|
||||
used_url = fields.Function(fields.Char(string='Used URL', readonly=True,
|
||||
help='This URL is used to retrieve the HTML file.'),
|
||||
'on_change_with_used_url')
|
||||
nsin = fields.Function(fields.Char(string='NSIN'),
|
||||
'on_change_with_nsin', setter='set_test_value')
|
||||
isin = fields.Function(fields.Char(string='ISIN'),
|
||||
|
@ -29,13 +44,11 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
text = fields.Function(fields.Text(string='Result',
|
||||
readonly=True), 'on_change_with_text')
|
||||
|
||||
def call_online_source(self):
|
||||
""" use updated values to call online-source
|
||||
@classmethod
|
||||
def default_rgxdate(cls):
|
||||
""" code to find date: dd.mm.yyyy
|
||||
"""
|
||||
OSourc = Pool().get('investment.source')
|
||||
|
||||
result = OSourc.read_from_website(self, debug=True)
|
||||
self.text = result.get('text', None)
|
||||
return '(\d{2}\.\d{2}\.\d{4})'
|
||||
|
||||
@classmethod
|
||||
def default_nohtml(cls):
|
||||
|
@ -43,6 +56,14 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
"""
|
||||
return True
|
||||
|
||||
def call_online_source(self):
|
||||
""" use updated values to call online-source
|
||||
"""
|
||||
OSourc = Pool().get('investment.source')
|
||||
|
||||
result = OSourc.read_from_website(self, debug=False)
|
||||
self.text = result.get('text', None)
|
||||
|
||||
@fields.depends('nsin', 'isin', 'symbol', 'text')
|
||||
def on_change_nsin(self):
|
||||
""" run request
|
||||
|
@ -81,6 +102,26 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
"""
|
||||
return ''
|
||||
|
||||
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
|
||||
""" generate url
|
||||
"""
|
||||
return Template(self.url).substitute({
|
||||
'isin': isin if isin is not None else '',
|
||||
'nsin': nsin if nsin is not None else '',
|
||||
'symbol': symbol if symbol is not None else '',
|
||||
})
|
||||
|
||||
@fields.depends('url', 'isin', 'nsin', 'symbol')
|
||||
def on_change_with_used_url(self, name=None):
|
||||
""" get url for testing
|
||||
"""
|
||||
if self.url:
|
||||
return self.get_url_with_parameter(
|
||||
isin = self.isin,
|
||||
nsin = self.nsin,
|
||||
symbol = self.symbol,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def set_test_value(cls, record, name, value):
|
||||
""" dont store it
|
||||
|
@ -107,14 +148,33 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
text = text.replace('\n\r', '\n').replace('\n\n', '\n')
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def get_regex_result(cls, html_text, rgxcode):
|
||||
""" run regex on html-text
|
||||
"""
|
||||
print('\n## get_regex_result:', rgxcode, type(rgxcode))
|
||||
rgxcode = rgxcode or ''
|
||||
if len(rgxcode) == 0:
|
||||
return None
|
||||
result = re.compile(rgxcode).match(html_text)
|
||||
if result is None:
|
||||
return None
|
||||
print('-- result:', result, result.group())
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def read_from_website(cls, updtsource, debug=False):
|
||||
""" read from url, extract values
|
||||
"""
|
||||
result = {}
|
||||
|
||||
res1 = requests.get(updtsource.url)
|
||||
if res1.reason == 'OK':
|
||||
res1 = requests.get(
|
||||
updtsource.url,
|
||||
allow_redirects=True,
|
||||
timeout=5.0)
|
||||
|
||||
print('-- res1:', res1.history)
|
||||
if res1.status_code in [200, 204, 404]:
|
||||
html = cls.cleanup_spaces(res1.text)
|
||||
|
||||
# remove html
|
||||
|
@ -126,8 +186,16 @@ class OnlineSource(ModelSQL, ModelView):
|
|||
|
||||
if debug:
|
||||
result['text'] = html
|
||||
|
||||
result['rate'] = cls.get_regex_result(html, updtsource.rgxrate)
|
||||
result['date'] = cls.get_regex_result(html, updtsource.rgxdate)
|
||||
result['isin'] = cls.get_regex_result(html, updtsource.rgxisin)
|
||||
result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin)
|
||||
result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol)
|
||||
|
||||
print('\n## result:', result)
|
||||
else :
|
||||
logger.error('read_from_website: %s' % res1.text)
|
||||
#logger.error('read_from_website: %s' % res1.text)
|
||||
if debug:
|
||||
result['text'] = res1.text
|
||||
return result
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue