This is a pre-production deployment of Warehouse, however changes made here WILL affect the production instance of PyPI.
Latest Version Dependencies status unknown Test status unknown Test coverage unknown
Project Description

ufal.udpipe

The ufal.udpipe is a Python binding to UDPipe library <http://ufal.mff.cuni.cz/udpipe>.

The bindings is a straightforward conversion of the C++ bindings API. In Python 2, strings can be both unicode and UTF-8 encoded str, and the library always produces unicode. In Python 3, strings must be only str.

Wrapped C++ API

The C++ API being wrapped follows. For a API reference of the original C++ API, see <http://ufal.mff.cuni.cz/udpipe/api-reference>.

Helper Structures
-----------------

  typedef vector<int> Children;

  typedef vector<string> Comments;

  class ProcessingError {
  public:
    bool occurred();
    string message;
  };

  class Word {
   public:
    int id;         // 0 is root, >0 is sentence word, <0 is undefined
    string form;    // form
    string lemma;   // lemma
    string upostag; // universal part-of-speech tag
    string xpostag; // language-specific part-of-speech tag
    string feats;   // list of morphological features
    int head;       // head, 0 is root, <0 is undefined
    string deprel;  // dependency relation to the head
    string deps;    // secondary dependencies
    string misc;    // miscellaneous information

    Children children;

    Word(int id = -1, const string& form = string());
  };
  typedef vector<Word> Words;

  class MultiwordToken {
   public:
    int idFirst, idLast;
    string form;
    string misc;

    MultiwordToken(int id_first = -1, int id_last = -1, const string& form = string(), const string& misc = string());
  };
  typedef vector<MultiwordToken> MultiwordTokens;

  class Sentence {
   public:
    Sentence();

    Words words;
    MultiwordTokens multiwordTokens;
    Comments comments
    static const string rootForm;

    bool empty();
    void clear();
    virtual Word& addWord(const char* form);
    void setHead(int id, int head, const string& deprel);
    void unlinkAllWords();
  };
  typedef vector<Sentence> Sentences;


Main Classes
------------

  class InputFormat {
   public:
    virtual void setText(const char* text);
    virtual bool nextSentence(Sentence& s, ProcessingError* error = nullptr);

    static InputFormat* newInputFormat(const string& name);
    static InputFormat* newConlluInputFormat();
    static InputFormat* newHorizontalInputFormat();
    static InputFormat* newVerticalInputFormat();
  };

  class OutputFormat {
   public:
    virtual string writeSentence(const Sentence& s) const;

    static OutputFormat* newOutputFormat(const string& name);
    static OutputFormat* newConlluOutputFormat();
    static OutputFormat* newHorizontalOutputFormat();
    static OutputFormat* newVerticalOutputFormat();
  };

  class Model {
   public:
    static Model* load(const char* fname);

    virtual InputFormat* newTokenizer(const string& options) const;
    virtual bool tag(Sentence& s, const string& options, ProcessingError* error = nullptr) const;
    virtual bool parse(Sentence& s, const string& options, ProcessingError* error) const;

    static const string DEFAULT;
  };

  class Pipeline {
   public:
    Pipeline(const Model* m, const string& input, const string& tagger, const string& parser, const string& output);

    void setModel(const Model* m);
    void setInput(const string& input);
    void setTagger(const string& tagger);
    void setParser(const string& parser);
    void setOutput(const string& output);

    string process(const string& data, ProcessingError* error = nullptr) const;

    static const string DEFAULT;
    static const string NONE;
  };

  class Trainer {
   public:

    static string train(const string& method, const Sentences& train, const Sentences& heldout,
                        const string& tokenizer, const string& tagger, const string& parser,
                        ProcessingError* error = nullptr);

    static const string DEFAULT;
    static const string NONE;
  };

  class Evaluator {
   public:
    Evaluator(const Model* m, const string& tokenizer, const string& tagger, const string& parser);

    void setModel(const Model* m);
    void setTokenizer(const string& tokenizer);
    void setTagger(const string& tagger);
    void setParser(const string& parser);

    string evaluate(const string& data, ProcessingError* error = nullptr) const;

    static const string DEFAULT;
    static const string NONE;
  };

  class Version {
   public:
    unsigned major;
    unsigned minor;
    unsigned patch;
    string prerelease;

    // Returns current version.
    static version current();
  };

Examples

run_udpipe

Simple pipeline loading data (tokenizing on request), tagging, parsing and writing to specified output format:

from ufal.udpipe import *

# In Python2, wrap sys.stdin and sys.stdout to work with unicode.
if sys.version_info[0] < 3:
    import codecs
    import locale
    encoding = locale.getpreferredencoding()
    sys.stdin = codecs.getreader(encoding)(sys.stdin)
    sys.stdout = codecs.getwriter(encoding)(sys.stdout)

if len(sys.argv) < 4:
    sys.stderr.write('Usage: %s input_format output_format model_file\n' % sys.argv[0])
    sys.exit(1)

sys.stderr.write('Loading model: ')
model = Model.load(sys.argv[3])
if not model:
    sys.stderr.write("Cannot load model from file '%s'\n" % sys.argv[3])
    sys.exit(1)
sys.stderr.write('done\n')

pipeline = Pipeline(model, sys.argv[1], Pipeline.DEFAULT, Pipeline.DEFAULT, sys.argv[2])
error = ProcessingError()

not_eof = True
while not_eof:
    text = ''

    # Read block
    while True:
        line = sys.stdin.readline()
        not_eof = bool(line)
        if not not_eof: break
        line = line.rstrip('\r\n')
        text += line
        text += '\n';
        if not line: break

    # Process data
    print "Processing %s" % text
    processed = pipeline.process(text, error)
    if error.occurred():
        sys.stderr.write("An error occurred when running run_udpipe: ")
        sys.stderr.write(error.message)
        sys.stderr.write("\n")
        sys.exit(1)
    sys.stdout.write(processed)

AUTHORS

Milan Straka <straka@ufal.mff.cuni.cz>

Release History

Release History

1.0.0.1

This version

History Node

TODO: Figure out how to actually get changelog content.

Changelog content for this version goes here.

Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

Show More

Download Files

Download Files

TODO: Brief introduction on what you do with files - including link to relevant help section.

File Name & Checksum SHA256 Checksum Help Version File Type Upload Date
ufal.udpipe-1.0.0.1.tar.gz (271.1 kB) Copy SHA256 Checksum SHA256 Source May 27, 2016

Supported By

WebFaction WebFaction Technical Writing Elastic Elastic Search Pingdom Pingdom Monitoring Dyn Dyn DNS HPE HPE Development Sentry Sentry Error Logging CloudAMQP CloudAMQP RabbitMQ Heroku Heroku PaaS Kabu Creative Kabu Creative UX & Design Fastly Fastly CDN DigiCert DigiCert EV Certificate Rackspace Rackspace Cloud Servers DreamHost DreamHost Log Hosting