#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# xls_utils.py  -  Helpers for handling .xls files.
#
# Copyright (C) 2008 Jan Jockusch <jan.jockusch@perfact.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

'''
Excel helper module to be called from external applications such as Zope

This module uses fileassets for testing:
- perfact/assets/tests/test_xls.xls
- perfact/assets/tests/test_xlsx.xlsx

This library wraps usage of xlrd/xlwt for Excel 97 (.xls) files and openpyxl
for Excel 2000 (.xlsx) files.

'''

# BUILTIN
import os
import tempfile
import string
import datetime
import subprocess
import six

from io import BytesIO

from .generic import cleanup_string

# CUSTOM - fileassets, only used for testing

import xlwt
import xlrd
import openpyxl

# pyExcelerator xls writer format, pyexcel_write_xls
TEST_SHEET = [
    (
        u'Sheet1',
        {(0, 0): u'Foo', (1, 0): u'Bar', (2, 0): u'Ham', (3, 0): u'Eggs'}
    ),
]


def read_excel(data, filename):
    '''Read XLS and XLSX files, differentiating by filename
    '''
    if filename.lower().endswith('xlsx'):
        return read_workbook(data)
    return pyexcel_read_xls(data)


def pyexcel_read_xls(data):
    '''Given a file-like object or a string, extract the
    requested page in a list of lists (cells).
    '''
    if not isinstance(data, (six.binary_type, six.text_type)):
        data = data.read()

    book = xlrd.open_workbook(file_contents=data)
    result = []
    for sheet_name in book.sheet_names():
        sheet = book.sheet_by_name(sheet_name)
        sheet_data = {
            (row, col): sheet.cell_value(row, col)
            for row in range(sheet.nrows)
            for col in range(sheet.ncols)
        }
        result.append((sheet_name, sheet_data))

    return result


def read_workbook(data):
    '''Read XLS2007 (XLSX) formated file.
    '''
    if isinstance(data, (six.binary_type, six.text_type)):
        data = BytesIO(data)

    book = openpyxl.load_workbook(data, data_only=True, read_only=True)
    result = []
    for sheet_name in book.sheetnames:
        sheet = book[sheet_name]
        sheet_data = {}
        for rowidx, row in enumerate(sheet.iter_rows()):
            sheet_data.update({
                (rowidx, colidx): cell.value
                for colidx, cell in enumerate(row)
            })
        result.append((sheet_name, sheet_data))

    book.close()
    return result


def read_excel_iter(data, filename, callback, colmap=None, sheetname=None):
    """
    Read XLS(X) file and call given function callback for each row.
    If colmap is given, it should map column indices to argument names of the
    function callback. Otherwise, the arguments are given sequentially. The
    first argument is always the row index.
    If sheetname is not given, use the first sheet.
    """
    if filename.lower().endswith('xlsx'):
        read_xlsx_iter(data, callback, colmap, sheetname)
    else:
        read_xls_iter(data, callback, colmap, sheetname)


def read_xls_iter(data, callback, colmap=None, sheetname=None):
    """ Cf. read_excel_iter """
    if not isinstance(data, (six.binary_type, six.text_type)):
        data = data.read()

    book = xlrd.open_workbook(file_contents=data)
    if sheetname is None:
        sheetname = list(book.sheet_names())[0]
    sheet = book.sheet_by_name(sheetname)
    for rowidx in range(sheet.nrows):
        _read_iter_callback(callback, rowidx, colmap, [
            sheet.cell_value(rowidx, colidx)
            for colidx in range(sheet.ncols)
        ])


def read_xlsx_iter(data, callback, colmap=None, sheetname=None):
    """Cf. read_excel_iter"""
    if isinstance(data, (six.binary_type, six.text_type)):
        data = BytesIO(data)

    book = openpyxl.load_workbook(data, data_only=True, read_only=True)
    if sheetname is None:
        sheetname = book.sheetnames[0]
    sheet = book[sheetname]
    for rowidx, row in enumerate(sheet.iter_rows()):
        _read_iter_callback(callback, rowidx, colmap,
                            [cell.value for cell in row])
    book.close()


def _read_iter_callback(callback, rowidx, colmap, values):
    "Inner function that calls callback according to colmap"
    if colmap is None:
        callback(rowidx, *values)
    else:
        args = {
            col: values[colidx] if colidx < len(values) else None
            for colidx, col in colmap.items()
        }
        callback(rowidx, **args)


def pyexcel_struct2table(data, sheet=0):
    '''Extract a sheet and put all values into a two-dimensional
    array.
    '''
    vals = data[sheet][1]

    # Analyze keys extracting max row and col
    maxrow = maxcol = 0
    for row, col in vals.keys():
        if row > maxrow:
            maxrow = row
        if col > maxcol:
            maxcol = col

    rows = []
    for row in range(maxrow + 1):
        cols = []
        for col in range(maxcol + 1):
            cols.append(vals.get((row, col), ''))
        rows.append(cols)

    return rows


def pyexcel_table2struct(data, sheetname='Sheet', encoding='utf-8'):
    '''
    Convert back values from a two dimensional list into the structure
    required for xls writing.
    '''
    vals = {}
    for row in range(len(data)):
        rowdata = data[row]
        for col in range(len(rowdata)):
            coldata = rowdata[col]
            if isinstance(coldata, bytes):
                coldata = coldata.decode(encoding)
            vals[(row, col)] = coldata

    return [(sheetname, vals)]


def pyexcel_write_xls(data, protect=False, mode='xls'):
    '''return a data stream holding an xls/xlsx file.
    "mode" may be either "xls" or "xlsx", defining the
    format specification of the output stream.
    '''
    assert protect is False, \
        "Dummy protection of XLS files is no longer supported"

    if mode.lower() == 'xlsx':
        return write_workbook(data)

    return pyexcel_write_xls_extended(
        [
            {
                'sheetname': sheet,
                'contents': vals,
                'styles': {
                    (0, col): "font: bold on"
                    for col in {key[1] for key in vals.keys()}
                }
            }
            for sheet, vals in data
        ]
    )


def pyexcel_write_xls_extended(data, max_content_length=32767):
    '''Given an array of cell descriptions, create an xls file.
    data: a list of mappings (i.e., dictionaries) with the following keys:

        sheetname (required): The name of the data sheet
        contents (required): a mapping from cell position (row,col) to content.
            Content can either be unicode (or bytes) or a list of tuples
            (part, fontdef).
            fontdef is passed to easyfont to create a font for the given part.
            For example, giving
              {(0,0): u'test', (0,1): [(u'this is ', ''),
               (u'bold', 'bold on')]}
            will set 'test' into the first cell and 'this is bold' into the
            second, with the word 'bold' being set in a bold font
        merged_cells: a list of dictionaries with the following keys:
            from:  cell position (row,col) of the first cell of the merge
            to:    cell position (row,col) of the last cell of the merge
            label: content of the merged cell, optional
            style: styling of the merged cell, optional
            Note that any cells targeted by merged_cells MUST NOT be in
            contents, as a single cell can only be written into ONCE.
        widths: a mapping from column index to width in cm
        heights: a mapping from row index to height in cm
        styles: a mapping from cell position (row,col) to a style
            description that can be used by easyxf. See example in
            https://github.com/python-excel/xlwt/blob/master/examples/xlwt_easyxf_simple_demo.py
            and the xf_dict definition in
            https://github.com/python-excel/xlwt/blob/master/xlwt/Style.py
            To get a specific font-height in pt mulitply by 20
            ("font: height 180;" for 9pt)
            Example: {(3,3):
            "font: height 180, bold on, underline on; align: horiz center;"}
        horz_page_breaks: a list of row numbers where page breaks should occur
        orientation: 'portrait' (default) or 'landscape'
        fit_page: Boolean with default False. If set, the worksheet is zoomed
            so it fits onto a page
        freeze_row: a row number that marks the end of the "frozen" section of
            the sheet. Any rows before this will be sticky / scroll separately.
            This settig is optional, without it freezing will not be enabled.
        freeze_column: a column number that marks the end of the "frozen"
            section of the sheet. Any columns before this will be sticky /
            scroll separately. This settig is optional, without it freezing
            will not be enabled.
    max_content_length: Max length of strings that are allowed to fit in one
        cell. The max value is 32767. This number is set by micosoft.

    '''

    # precompute required styles, only creating one object per distinct
    # definition
    xfstyles = {}
    for sheet in data:
        if 'styles' in sheet:
            for definition in sheet['styles'].values():
                if definition in xfstyles:
                    continue
                xfstyles[definition] = xlwt.easyxf(definition)
        if 'merged_cells' in sheet:
            for merged in sheet['merged_cells']:
                if 'style' not in merged or merged['style'] in xfstyles:
                    continue
                xfstyles[merged['style']] = xlwt.easyxf(merged['style'])

    # precompute required fonts for RTF contents
    fonts = {}
    for sheet in data:
        for key, content in list(sheet['contents'].items()):
            if not isinstance(content, list):
                continue
            for part in content:
                if part[1] in fonts:
                    continue
                fonts[part[1]] = xlwt.easyfont(part[1])

    wb = xlwt.Workbook()

    for sheet in data:
        ws = wb.add_sheet(sheet['sheetname'])
        if sheet.get('fit_page', False):
            ws.set_fit_num_pages(1)
        if sheet.get('orientation', 'portrait') == 'landscape':
            ws.portrait = False

        for col, w in sheet.get('widths', {}).items():
            ws.col(col).width = int(w*1310)
        for row, h in sheet.get('heights', {}).items():
            ws.row(row).height = int(h*566)

        styles = sheet.get('styles', {})
        for key, content in sheet['contents'].items():
            row, col = key
            style = xfstyles.get(styles.get(key, None), None)
            if isinstance(content, list):
                rt = []
                content_length = 0
                for part in content:
                    content_limit = max_content_length - content_length
                    if len(part[0]) > content_limit:
                        # Cut string so it fits into the cell
                        rt.append((part[0][:content_limit], fonts[part[1]]))
                        # Break loop because max size of content was reached
                        break
                    rt.append((part[0], fonts[part[1]]))
                    content_length += len(part[0])
                if style is None:
                    ws.write_rich_text(row, col, rt)
                else:
                    ws.write_rich_text(row, col, rt, style)
            else:
                if isinstance(content, six.string_types):
                    if len(content) > max_content_length:
                        content = content[:max_content_length]
                if style is None:
                    ws.write(row, col, content)
                else:
                    ws.write(row, col, content, style)

        # Setup merged cells
        merged_cells = sheet.get('merged_cells', [])
        for merged in merged_cells:
            r1, c1 = merged['from']
            r2, c2 = merged['to']
            label = merged.get('label', '')
            style = xfstyles.get(merged.get('style', None), None)
            if style is None:
                ws.write_merge(r1, r2, c1, c2, label)
            else:
                ws.write_merge(r1, r2, c1, c2, label, style)

        # Handle freezing of header areas, if configured
        if 'freeze_row' in sheet or 'freeze_column' in sheet:
            # enable freezing
            ws.set_panes_frozen(True)
            # this removes the split if the user unfreezes the sheet in excel
            ws.set_remove_splits(True)
            if 'freeze_row' in sheet:
                ws.set_horz_split_pos(sheet['freeze_row'])
            if 'freeze_column' in sheet:
                ws.set_vert_split_pos(sheet['freeze_column'])

        ws.horz_page_breaks = [
            (break_row, 0, 255)
            for break_row in sheet.get('horz_page_breaks', [])
        ]

    tmpdir = tempfile.mkdtemp()
    xlsfile = tmpdir + '/in.xls'
    wb.save(xlsfile)
    with open(xlsfile, 'rb') as myfile:
        file_bytes = myfile.read()
    os.system('rm -r ' + tmpdir)
    return file_bytes


def pyexcel_table2dicts(data, cols=None, encoding='utf-8'):
    '''
    Create a list of dictionaries from the two dimensional array
    created by struct2dict
    '''
    out = []
    for item in data:
        # Basic generic processing
        processed = [
            a.decode(encoding).strip()
            if isinstance(a, bytes)
            else six.text_type(a).strip()
            for a in item
        ]

        # Ignore empty lines
        if not list(filter(None, processed)):
            continue

        # No columns given? Use first non-trivial line as keys
        if not cols:
            cols = processed
            continue

        # Pad missing entries
        processed.extend(['', ] * (len(cols) - len(processed)))

        # Retrieve column dictionary
        d = dict((cols[i], processed[i]) for i in range(len(cols)))

        out.append(d)

    return out


def generic_convert(data, extension='xls', target='xlsx'):
    '''Convert files using libreoffice.'''
    tmpdir = tempfile.mkdtemp()
    extension = cleanup_string(
        extension, valid_chars=string.ascii_letters + string.digits)
    target = cleanup_string(
        target, valid_chars=string.ascii_letters + string.digits)
    # Write source file
    infile = tmpdir + '/in.' + extension
    fh = open(infile, 'wb')
    fh.write(data)
    fh.close()
    # Perform conversion
    return subprocess.check_output(
        ['unoconv', '--stdout', '-f', target, infile],
    )


def pyexcel_convert(data, extension='xlsx'):
    '''Convert data into Excel97 format
    '''
    return generic_convert(data, 'xlsx', 'xls')


def pyexcel_date_from_float(value):
    '''Convert an XLS float datetime to ISO string notation. '''
    base_str = '1899-12-30 00:00:00'
    fmt = '%Y-%m-%d %H:%M:%S'

    base = datetime.datetime.strptime(base_str, fmt)
    delta = datetime.timedelta(days=float(value))
    result = base + delta
    return result.strftime(fmt)


def pyexcel_book_from_template(template_bytes, data, excel_format='xlsx'):
    '''Build new excel workbook from template (bytes!) with given
    data, which should correspond to the template, so keys from data must
    match template sheetnames. data example:

    data = {
        '<SheetName>': {
            'A1': u'foobar',
            'C27': u'1234.5'
        }
    }

    Function should be able to preserve simple style information (color,
    fonsize, border, formulas, protection) but no garantee can be given for
    full style support or preservation of all template-sheet features,
    especially vba.
    '''

    # put bytes to file-like object
    template_io = BytesIO(template_bytes)

    # cook workbook object
    book = openpyxl.load_workbook(filename=template_io)

    # loop through data and apply changes
    for sheetname in data:
        sheet_changes = data[sheetname]
        for cell in sheet_changes:
            book[sheetname][cell].value = sheet_changes[cell]

    # faked output file
    output_io = BytesIO()

    # save book to fake file...
    book.save(filename=output_io)

    # ...rewind the tape
    output_io.seek(0)

    # ...so we can return bytes
    return output_io.read()


def write_workbook(data, colwidth=14):
    '''Build an Excel 2007 workbook (xlsx) from given data. xlsx files support
    1,048,576 rows instead of 65,536, so this should be used for lengthy
    datasets.
    When data is an empty dictionary, this method will return an empty workbook
    with a single sheet.
    '''
    wb = openpyxl.Workbook()
    font_header = openpyxl.styles.Font(name='Arial', bold=True)
    font_fields = openpyxl.styles.Font(name='Arial')

    for index, item in enumerate(data):
        sheet_name, sheet_data = item

        # Open/Create worksheet
        if index == 0:
            ws = wb.active
            ws.title = sheet_name
        else:
            ws = wb.create_sheet(title=sheet_name)

        # iterate cell entries
        for key in sheet_data:
            row, col = key
            cell = ws.cell(column=col+1, row=row+1, value=sheet_data[key])
            cell.font = font_header if row == 0 else font_fields

        # set column width to something sensible
        # yes this is a bit hacky, but we need to get the column letter
        for col_index, col in enumerate(ws.columns):
            col_letter = openpyxl.utils.get_column_letter(col_index+1)
            ws.column_dimensions[col_letter].width = colwidth

    # write to tempfile to return bytes
    # also mark workbook for garbage collection to save memory
    with tempfile.NamedTemporaryFile() as tmp:
        wb.save(tmp.name)
        del wb
        tmp.seek(0)
        file_bytes = tmp.read()
    return file_bytes


if __name__ == '__main__':

    '''
    # This is an example of a "poor man's test suite"
    # when removing the docstrings and running the code in python,
    # the following lines will be directly executed.
    # Please notice that the code is an example and may not work
    # due to mismatches of your local system.
    # Hint: Set up a virtual environment and then run the module, eg.:
    #
    #   virtualenv -p python3 venv3
    #  . venv3/bin/activate
    #  pip install xlwt openpyxl six html2text markdown
    #  python3 ./xls.py

    print('Working')
    filename = 'perfact/assets/tests/test_xls.xls'
    with open(filename, 'rb') as testfile:
        read_res = read_excel(testfile, filename)
    print(read_res)

    outbytes = pyexcel_write_xls(read_res, mode='xls')
    with open('out.xls', 'wb') as outfile:
        outfile.write(outbytes)
    print('written')

    filename = 'out.xls'
    with open(filename, 'rb') as myfile:
        write_res = read_excel(myfile, filename)
    print(write_res)
    os.system('rm -r out.xls')
    print('Compare XLS read write: {}'.format(write_res == read_res))
    print('done XLS')

    filename = 'perfact/assets/tests/test_xlsx_wide.xlsx'
    with open(filename, 'rb') as testfile:
        read_res = read_excel(testfile, filename)
    print(read_res)

    outbytes = pyexcel_write_xls(read_res, mode='xlsx')
    with open('out.xlsx', 'wb') as outfile:
        outfile.write(outbytes)
    print('written')

    filename = 'out.xlsx'
    with open(filename, 'rb') as myfile:
        write_res = read_excel(myfile, filename)
    print(write_res)
    os.system('rm -r out.xlsx')
    print('done XLSX')

    print('Compare XLSX read write: {}'.format(write_res == read_res))
    '''
