investment/onlinesource.py
2022-11-21 16:23:26 +01:00

218 lines
7 KiB
Python

# -*- coding: utf-8 -*-
# This file is part of the investment-module from m-ds for Tryton.
# The COPYRIGHT file at the top level of this repository contains the
# full copyright notices and license terms.
from string import Template
import requests, logging, html2text, re
from trytond.model import ModelView, ModelSQL, fields, Unique, Check
from trytond.transaction import Transaction
from trytond.pool import Pool
logger = logging.getLogger(__name__)
class OnlineSource(ModelSQL, ModelView):
'Online Source'
__name__ = 'investment.source'
name = fields.Char(string='Name', required=True)
url = fields.Char(string='URL', required=True)
nohtml = fields.Boolean(string='Remove HTML',
help='Removes HTML tags before the text is interpreted.')
rgxdate = fields.Char(string='Date', required=True,
help='Regex code to find the date in the downloaded HTML file.')
rgxrate = fields.Char(string='Rate', required=True,
help='Regex code to find the rate in the downloaded HTML file.')
rgxisin = fields.Char(string='ISIN',
help='Regex code to find the ISIN in the downloaded HTML file.')
rgxnsin = fields.Char(string='NSIN',
help='Regex code to find the NSIN in the downloaded HTML file.')
rgxsymbol = fields.Char(string='Symbol',
help='Regex code to find the symbol in the downloaded HTML file.')
# field to test requests
used_url = fields.Function(fields.Char(string='Used URL', readonly=True,
help='This URL is used to retrieve the HTML file.'),
'on_change_with_used_url')
nsin = fields.Function(fields.Char(string='NSIN'),
'on_change_with_nsin', setter='set_test_value')
isin = fields.Function(fields.Char(string='ISIN'),
'on_change_with_isin', setter='set_test_value')
symbol = fields.Function(fields.Char(string='Symbol'),
'on_change_with_symbol', setter='set_test_value')
text = fields.Function(fields.Text(string='Result',
readonly=True), 'on_change_with_text')
@classmethod
def default_rgxdate(cls):
""" code to find date: dd.mm.yyyy
"""
return '(\\d{2}\\.\\d{2}\\.\\d{4})'
@classmethod
def default_nohtml(cls):
""" default: True
"""
return True
@fields.depends('nsin', 'isin', 'symbol', 'text')
def on_change_nsin(self):
""" run request
"""
self.call_online_source()
@fields.depends('nsin', 'isin', 'symbol', 'text')
def on_change_isin(self):
""" run request
"""
self.call_online_source()
@fields.depends('nsin', 'isin', 'symbol', 'text')
def on_change_symbol(self):
""" run request
"""
self.call_online_source()
def on_change_with_text(self, name=None):
""" return existing value
"""
return ''
def on_change_with_nsin(self, name=None):
""" return existing value
"""
return ''
def on_change_with_isin(self, name=None):
""" return existing value
"""
return ''
def on_change_with_symbol(self, name=None):
""" return existing value
"""
return ''
@fields.depends('url', 'isin', 'nsin', 'symbol')
def on_change_with_used_url(self, name=None):
""" get url for testing
"""
if self.url:
return self.get_url_with_parameter(
isin = self.isin,
nsin = self.nsin,
symbol = self.symbol,
)
@classmethod
def set_test_value(cls, record, name, value):
""" dont store it
"""
pass
def call_online_source(self):
""" use updated values to call online-source,
for testing parameters
"""
OSourc = Pool().get('investment.source')
result = OSourc.read_from_website(
self,
isin = self.isin,
nsin = self.nsin,
symbol = self.symbol,
debug=True,
)
self.text = result.get('text', None)
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
""" generate url
"""
return Template(self.url).substitute({
'isin': isin if isin is not None else '',
'nsin': nsin if nsin is not None else '',
'symbol': symbol if symbol is not None else '',
})
@classmethod
def update_rate(cls, asset):
""" read data from inet, write result to rates of asset
"""
if asset.updtsource is None:
return
rate_data = cls.read_from_website(asset.updtsource)
@classmethod
def cleanup_spaces(cls, text):
""" remove multiple spaces
"""
len1 = -1
while len1 != len(text):
len1 = len(text)
text = text.replace('\t', ' ').replace(' ', ' ')
text = text.replace('\n\r', '\n').replace('\n\n', '\n')
return text
@classmethod
def get_regex_result(cls, html_text, rgxcode):
""" run regex on html-text
"""
print('\n## get_regex_result:', rgxcode, type(rgxcode))
rgxcode = rgxcode or ''
if len(rgxcode) == 0:
print('-- get_regex_result: stop 1')
return None
result = re.compile(rgxcode).search(html_text)
if result is None:
print('-- get_regex_result: stop 2')
return None
print('-- get_regex_result - result:', result, result.group())
return result
@classmethod
def read_from_website(cls, updtsource, isin=None, nsin=None, symbol=None, debug=True):
""" read from url, extract values
"""
result = {}
res1 = requests.get(
updtsource.get_url_with_parameter(
isin = isin,
nsin = nsin,
symbol = symbol,
),
allow_redirects=True,
timeout=5.0)
if res1.status_code in [200, 204]:
html = cls.cleanup_spaces(res1.text)
# remove html-tags
if updtsource.nohtml:
o1 = html2text.HTML2Text()
o1.ignore_links = True
html = o1.handle(html)
del o1
if debug:
result['text'] = html
result['rate'] = cls.get_regex_result(html, updtsource.rgxrate)
result['date'] = cls.get_regex_result(html, updtsource.rgxdate)
result['isin'] = cls.get_regex_result(html, updtsource.rgxisin)
result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin)
result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol)
print('\n## result:', result)
else :
logger.error('read_from_website: %(code)s, url: %(url)s, redirects: [%(redirects)s]' % {
'code': res1.status_code,
'url': res1.url,
'redirects': ', '.join([x.url for x in res1.history]),
})
if debug:
result['text'] = res1.text
return result
# end OnlineSource