Source code for budou.budou
# -*- coding: utf-8 -*-
#
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Budou: an automatic organizer tool for beautiful line breaking in CJK
Usage:
budou [--segmenter=<seg>] [--language=<lang>] [--classname=<class>] [--inlinestyle] <source>
budou -h | --help
budou -v | --version
Options:
-h --help Show this screen.
-v --version Show version.
--segmenter=<segmenter> Segmenter to use [default: nlapi].
--language=<language> Language the source in.
--classname=<classname> Class name for output SPAN tags. Use
comma-separated value to specify multiple classes.
--inlinestyle Add :code:`display:inline-block` as inline style
attribute.
"""
from __future__ import print_function
import sys
import warnings
from docopt import docopt
from .parser import get_parser
from .__version__ import __version__
AVAILABLE_SEGMENTERS = {'nlapi', 'mecab'}
[docs]def main():
"""Budou main method for the command line tool.
"""
args = docopt(__doc__)
if args['--version']:
print(__version__)
sys.exit()
result = parse(
args['<source>'],
segmenter=args['--segmenter'],
language=args['--language'],
classname=args['--classname'],
inlinestyle=args['--inlinestyle'],
)
print(result['html_code'])
sys.exit()
[docs]def parse(source, segmenter='nlapi', language=None, max_length=None,
classname=None, attributes=None, inlinestyle=False, **kwargs):
"""Parses input source.
Args:
source (str): Input source to process.
segmenter (str, optional): Segmenter to use [default: nlapi].
language (str, optional): Language code.
max_length (int, optional): Maximum length of a chunk.
classname (str, optional): Class name of output SPAN tags.
attributes (dict, optional): Attributes for output SPAN tags.
inlinestyle (bool, optional): Add :code:`display:inline-block` as inline
style attribute.
Returns:
Results in a dict. :code:`chunks` holds a list of chunks
(:obj:`budou.chunk.ChunkList`) and :code:`html_code` holds the output HTML
code.
"""
parser = get_parser(segmenter, **kwargs)
return parser.parse(
source, language=language, max_length=max_length, classname=classname,
attributes=attributes, inlinestyle=inlinestyle)
[docs]def authenticate(json_path=None):
"""Gets a Natural Language API parser by authenticating the API.
**This method is deprecated.** Please use :obj:`budou.parser.get_parser` to
obtain a parser instead.
Args:
json_path (str, optional): The file path to the service account's
credentials.
Returns:
Parser. (:obj:`budou.parser.NLAPIParser`)
"""
msg = ('budou.authentication() is deprecated. '
'Please use budou.parser.get_parser() to obtain a parser instead.')
warnings.warn(msg, DeprecationWarning)
parser = get_parser('nlapi', credentials_path=json_path)
return parser
if __name__ == '__main__':
main()