investment/onlinesource.py

204 lines
6.4 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# This file is part of the investment-module from m-ds for Tryton.
# The COPYRIGHT file at the top level of this repository contains the
# full copyright notices and license terms.
2022-11-20 19:40:51 +00:00
from string import Template
import requests, logging, html2text, re
from trytond.model import ModelView, ModelSQL, fields, Unique, Check
from trytond.transaction import Transaction
from trytond.pool import Pool
2022-11-18 23:21:52 +00:00
logger = logging.getLogger(__name__)
class OnlineSource(ModelSQL, ModelView):
'Online Source'
__name__ = 'investment.source'
2022-11-18 23:21:52 +00:00
name = fields.Char(string='Name', required=True)
url = fields.Char(string='URL', required=True)
nohtml = fields.Boolean(string='Remove HTML',
help='Removes HTML tags before the text is interpreted.')
2022-11-20 19:40:51 +00:00
rgxdate = fields.Char(string='Date', required=True,
help='Regex code to find the date in the downloaded HTML file.')
rgxrate = fields.Char(string='Rate', required=True,
help='Regex code to find the drte in the downloaded HTML file.')
rgxisin = fields.Char(string='ISIN',
help='Regex code to find the ISIN in the downloaded HTML file.')
rgxnsin = fields.Char(string='NSIN',
help='Regex code to find the NSIN in the downloaded HTML file.')
rgxsymbol = fields.Char(string='Symbol',
help='Regex code to find the symbol in the downloaded HTML file.')
2022-11-18 23:21:52 +00:00
# field to test requests
2022-11-20 19:40:51 +00:00
used_url = fields.Function(fields.Char(string='Used URL', readonly=True,
help='This URL is used to retrieve the HTML file.'),
'on_change_with_used_url')
2022-11-18 23:21:52 +00:00
nsin = fields.Function(fields.Char(string='NSIN'),
'on_change_with_nsin', setter='set_test_value')
isin = fields.Function(fields.Char(string='ISIN'),
'on_change_with_isin', setter='set_test_value')
symbol = fields.Function(fields.Char(string='Symbol'),
'on_change_with_symbol', setter='set_test_value')
text = fields.Function(fields.Text(string='Result',
readonly=True), 'on_change_with_text')
2022-11-20 19:40:51 +00:00
@classmethod
def default_rgxdate(cls):
""" code to find date: dd.mm.yyyy
2022-11-18 23:21:52 +00:00
"""
2022-11-20 19:40:51 +00:00
return '(\d{2}\.\d{2}\.\d{4})'
2022-11-18 23:21:52 +00:00
@classmethod
def default_nohtml(cls):
""" default: True
"""
return True
2022-11-20 19:40:51 +00:00
def call_online_source(self):
""" use updated values to call online-source
"""
OSourc = Pool().get('investment.source')
result = OSourc.read_from_website(self, debug=False)
self.text = result.get('text', None)
2022-11-18 23:21:52 +00:00
@fields.depends('nsin', 'isin', 'symbol', 'text')
def on_change_nsin(self):
""" run request
"""
self.call_online_source()
@fields.depends('nsin', 'isin', 'symbol', 'text')
def on_change_isin(self):
""" run request
"""
self.call_online_source()
@fields.depends('nsin', 'isin', 'symbol', 'text')
def on_change_symbol(self):
""" run request
"""
self.call_online_source()
def on_change_with_text(self, name=None):
""" return existing value
"""
return ''
def on_change_with_nsin(self, name=None):
""" return existing value
"""
return ''
def on_change_with_isin(self, name=None):
""" return existing value
"""
return ''
def on_change_with_symbol(self, name=None):
""" return existing value
"""
return ''
2022-11-20 19:40:51 +00:00
def get_url_with_parameter(self, isin=None, nsin=None, symbol=None):
""" generate url
"""
return Template(self.url).substitute({
'isin': isin if isin is not None else '',
'nsin': nsin if nsin is not None else '',
'symbol': symbol if symbol is not None else '',
})
@fields.depends('url', 'isin', 'nsin', 'symbol')
def on_change_with_used_url(self, name=None):
""" get url for testing
"""
if self.url:
return self.get_url_with_parameter(
isin = self.isin,
nsin = self.nsin,
symbol = self.symbol,
)
2022-11-18 23:21:52 +00:00
@classmethod
def set_test_value(cls, record, name, value):
""" dont store it
"""
pass
@classmethod
def update_rate(cls, asset):
""" read data from inet
"""
if asset.updtsource is None:
return
rate_data = cls.read_from_website(asset.updtsource)
@classmethod
def cleanup_spaces(cls, text):
""" remove multiple spaces
"""
len1 = -1
while len1 != len(text):
len1 = len(text)
text = text.replace('\t', ' ').replace(' ', ' ')
text = text.replace('\n\r', '\n').replace('\n\n', '\n')
return text
2022-11-20 19:40:51 +00:00
@classmethod
def get_regex_result(cls, html_text, rgxcode):
""" run regex on html-text
"""
print('\n## get_regex_result:', rgxcode, type(rgxcode))
rgxcode = rgxcode or ''
if len(rgxcode) == 0:
return None
result = re.compile(rgxcode).match(html_text)
if result is None:
return None
print('-- result:', result, result.group())
return result
2022-11-18 23:21:52 +00:00
@classmethod
def read_from_website(cls, updtsource, debug=False):
""" read from url, extract values
"""
result = {}
2022-11-20 19:40:51 +00:00
res1 = requests.get(
updtsource.url,
allow_redirects=True,
timeout=5.0)
print('-- res1:', res1.history)
if res1.status_code in [200, 204, 404]:
2022-11-18 23:21:52 +00:00
html = cls.cleanup_spaces(res1.text)
# remove html
if updtsource.nohtml:
o1 = html2text.HTML2Text()
o1.ignore_links = True
html = o1.handle(html)
del o1
if debug:
result['text'] = html
2022-11-20 19:40:51 +00:00
result['rate'] = cls.get_regex_result(html, updtsource.rgxrate)
result['date'] = cls.get_regex_result(html, updtsource.rgxdate)
result['isin'] = cls.get_regex_result(html, updtsource.rgxisin)
result['nsin'] = cls.get_regex_result(html, updtsource.rgxnsin)
result['symbol'] = cls.get_regex_result(html, updtsource.rgxsymbol)
print('\n## result:', result)
2022-11-18 23:21:52 +00:00
else :
2022-11-20 19:40:51 +00:00
#logger.error('read_from_website: %s' % res1.text)
2022-11-18 23:21:52 +00:00
if debug:
result['text'] = res1.text
return result
# end OnlineSource