#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys
import string
import re
import bisect

HEADER = """
// This file is auto-generated header by tools/generate-subtag.py.
// So do not edit this file.
#ifndef IV_I18N_LANGUAGE_TAG_H_
#define IV_I18N_LANGUAGE_TAG_H_
#include <iv/detail/unordered_map.h>
#include <iv/detail/array.h>
#include <iv/stringpiece.h>
namespace iv {
namespace core {
namespace i18n {
namespace data {

// Irregular / regular grandfathered language tags and Preferred-Value.
// Following tags don't provide Preferred-Value in registry.
// So we define fallback tags by executing ICU canonicalizations.
//     cel-graulish cel-graulis
//     en-GB-oed    en-GB-x-oed
//     i-default    en-x-i-default
//     i-enochian   x-i-enochian
//     i-mingo      see-x-i-mingo
//     zh-min       nan-x-zh-min
typedef std::array<std::pair<StringPiece, StringPiece>, %d> GrandfatheredArray;
static const GrandfatheredArray kGrandfathered = { {
%s
} };

// Redundant tags are replaced to Preferred-Value.
// If tag don't provide Preferred-Value, we don't replace it.
typedef std::array<std::pair<StringPiece, StringPiece>, %d> RedundantArray;
static const RedundantArray kRedundant = { {
%s
} };

// Language tags with Preferred-Value.
typedef std::array<std::pair<StringPiece, StringPiece>, %d> LanguageArray;
static const LanguageArray kLanguage = { {
%s
} };

// Region tags with Preferred-Value.
typedef std::array<std::pair<StringPiece, StringPiece>, %d> RegionArray;
static const RegionArray kRegion = { {
%s
} };

// Variant tags with Preferred-Value.
typedef std::array<std::pair<StringPiece, StringPiece>, %d> VariantArray;
static const VariantArray kVariant = { {
%s
} };

// Extlang tags with Preferred-Value and Prefix.
typedef std::array<std::pair<StringPiece, std::pair<StringPiece, StringPiece> >, %d> ExtlangArray;  // NOLINT
static const ExtlangArray kExtlang = { {
%s
} };

typedef std::unordered_map<std::string, std::string> TagMap;

inline const TagMap& Grandfathered() {
  static const TagMap map(kGrandfathered.begin(), kGrandfathered.end());
  return map;
}

inline const TagMap& Redundant() {
  static const TagMap map(kRedundant.begin(), kRedundant.end());
  return map;
}

inline const TagMap& Language() {
  static const TagMap map(kLanguage.begin(), kLanguage.end());
  return map;
}

inline const TagMap& Region() {
  static const TagMap map(kRegion.begin(), kRegion.end());
  return map;
}

inline const TagMap& Variant() {
  static const TagMap map(kVariant.begin(), kVariant.end());
  return map;
}

typedef std::unordered_map<std::string, std::pair<std::string, std::string> > ExtlangMap;  // NOLINT

inline const ExtlangMap& Extlang() {
  static const ExtlangMap map(kExtlang.begin(), kExtlang.end());
  return map;
}

} } } }  // namespace iv::core::i18n::data
#endif  // IV_I18N_LANGUAGE_TAG_H_
"""

LOWER = re.compile('^[a-z0-9]+$')
UPPER = re.compile('^[A-Z0-9]+$')
TITLE = re.compile('^[A-Z0-9][a-z0-9]*$')

def is_lower(str):
  return LOWER.match(str)

def is_upper(str):
  return UPPER.match(str)

def is_title(str):
  return TITLE.match(str)

class DB(object):
  PATTERN = re.compile('^(?P<key>.+)\s*:\s*(?P<value>.+)$')
  RANGE = re.compile('(?P<first>[^\.]+)\.\.(?P<last>[^\.]+)')

  def __init__(self, source):
    self.__registry = []
    item = {}
    prev_key = None
    with open(source) as c:
      for line in c:
        line = line.strip()
        if line == '%%':
          # delimiter
          self.validate_and_append(item)
          item = {}
          prev_key = None
        else:
          m = self.PATTERN.match(line)
          if m:
            key = m.group('key')
            value = m.group('value')
            if item.has_key(key):
              prev = item[key]
              if isinstance(prev, list):
                item[key].append(value)
              else:
                item[key] = [item[key], value]
            else:
              item[key] = value
            prev_key = key
          else:
            if prev_key:
              v = item[prev_key]
              if isinstance(v, list):
                v[-1] = v[-1] + ' ' + line
              else:
                item[prev_key] = item[prev_key] + ' ' + line
      self.validate_and_append(item)
      self.extract_registry()

  def registry(self):
    return self.__registry

  def validate_and_append(self, item):
    if item.has_key('Type'):
      self.__registry.append(item)

  def extract_registry(self):
    pass
#    for item in self.__registry:
#      sub = item['Subtag']
#      m = self.RANGE.match(sub)
#      if m:
#        for tag in range(m.group('first'), m.group('last')):
#          print tag

def main(source):
  db = DB(source)

  grandfathered = [
      '  std::make_pair("cel-graulish", "cel-graulis")',
      '  std::make_pair("en-gb-oed", "en-GB-x-oed")',
      '  std::make_pair("i-default", "en-x-i-default")',
      '  std::make_pair("i-enochian", "x-i-enochian")',
      '  std::make_pair("i-mingo", "see-x-i-mingo")',
      '  std::make_pair("zh-min", "nan-x-zh-min")',
  ]
  for item in filter(lambda i: i['Type'] == 'grandfathered', db.registry()):
    if item.has_key('Preferred-Value'):
      grandfathered.append(
          '  std::make_pair("%s", "%s")' % (item['Tag'].lower(), item['Preferred-Value']))

  redundant = []
  for item in filter(lambda i: i['Type'] == 'redundant', db.registry()):
    if item.has_key('Preferred-Value'):
      redundant.append(
          '  std::make_pair("%s", "%s")' % (item['Tag'].lower(), item['Preferred-Value']))

  # all language tag should be lower case
  language = []
  for item in filter(lambda i: i['Type'] == 'language', db.registry()):
    if not is_lower(item['Subtag']):
      #print item
      pass
    if item.has_key('Preferred-Value'):
      language.append(
          '  std::make_pair("%s", "%s")' % (item['Subtag'].lower(), item['Preferred-Value']))

  # all extlang tag should be lower case
  extlang = []
  for item in filter(lambda i: i['Type'] == 'extlang', db.registry()):
    assert is_lower(item['Subtag']), item['Subtag']
    if item.has_key('Preferred-Value') and item.has_key('Prefix'):
      extlang.append(
          '  std::make_pair("%s", std::make_pair("%s", "%s"))' % (item['Subtag'].lower(), item['Preferred-Value'].lower(), item['Prefix'].lower()))

  # all script tag should be title case
  script = []
  for item in filter(lambda i: i['Type'] == 'script', db.registry()):
    if not is_title(item['Subtag']):
      assert not item.has_key('Preferred-Value')
    if item.has_key('Preferred-Value'):
      script.append(
          '  std::make_pair("%s", "%s")' % (item['Subtag'].upper(), item['Preferred-Value']))

  # all region tag should be upper case
  region = []
  for item in filter(lambda i: i['Type'] == 'region', db.registry()):
    if not is_upper(item['Subtag']):
      assert not item.has_key('Preferred-Value')
    if item.has_key('Preferred-Value'):
      region.append(
          '  std::make_pair("%s", "%s")' % (item['Subtag'].upper(), item['Preferred-Value']))

  # all variant tag should be title case
  variant = []
  for item in filter(lambda i: i['Type'] == 'variant', db.registry()):
    assert is_lower(item['Subtag']), item['Subtag']
    if item.has_key('Preferred-Value'):
      variant.append(
          '  std::make_pair("%s", "%s")' % (item['Subtag'].lower(), item['Preferred-Value']))

  print (HEADER %
      (
        len(grandfathered), ',\n'.join(grandfathered),
        len(redundant), ',\n'.join(redundant),
        len(language), ',\n'.join(language),
        len(region), ',\n'.join(region),
        len(variant), ',\n'.join(variant),
        len(extlang), ',\n'.join(extlang)
       )
      ).strip()

if __name__ == '__main__':
  main(sys.argv[1])
