-
Notifications
You must be signed in to change notification settings - Fork 6
/
psmetadata.py
executable file
·153 lines (128 loc) · 4.19 KB
/
psmetadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#
# Displays all the metadata contained in a NeXML file.
#
import dendropy
from pprint import pprint
from StringIO import StringIO
import argparse
import sys
import os
argparser = argparse.ArgumentParser(
description='Display all the metadata contained in a file that PhyloStyloTastic can see or render'
)
argparser.add_argument(
'source',
nargs=1,
help='Input NeXML file to process'
)
argparser.add_argument(
'--all', '-a',
action='store_true',
dest='flag_all',
help='Display every annotation'
)
argparser.add_argument(
'--fullnames', '-fn',
action='store_true',
dest='flag_fullnames',
help='Display full(y resolved) names for properties'
)
argparser.add_argument(
'--ladderize', '-l',
nargs='?',
help='Write out a left-ladderized version of this NeXML file'
)
args = argparser.parse_args()
# Load NeXML files.
ds = dendropy.DataSet.get_from_path(args.source[0], 'nexml')
ds.attach_taxon_set()
#
# SECTION 1: Look for Meta nodes (http://www.nexml.org/nexml/html/doc/schema-1/meta/annotations/#Meta)
# and display them.
#
metadata_order = []
metadata = dict()
def search_for_meta_nodes(node):
# Check for meta elements and display them.
node_id = str(node.__class__.__name__) + " " + node.oid
# print " - annotations: %d" % len(node.annotations)
for annotation in node.annotations:
if node_id in metadata:
metadata[node_id].append(annotation)
else:
metadata_order.append(node_id)
metadata[node_id] = [annotation]
# Search for meta nodes on:
# - The top-level Nexml node.
search_for_meta_nodes(ds)
# - Each OTU.
for taxonsets in ds.taxon_sets:
for taxon in taxonsets:
search_for_meta_nodes(taxon)
# - Each trees and tree node.
for tree_list in ds.tree_lists:
for tree in tree_list:
search_for_meta_nodes(tree)
for node in tree.nodes():
search_for_meta_nodes(node)
# Display everything.
property_values = dict()
property_types = dict()
def record_property(name, value, proptype):
if name not in property_values:
property_values[name] = dict()
if value not in property_values[name]:
property_values[name][value] = 0
property_values[name][value] += 1
if name not in property_types:
property_types[name] = dict()
if proptype is not None:
if proptype not in property_types[name]:
property_types[name][proptype] = 0
property_types[name][proptype] += 1
# Process every record.
for node_id in metadata_order:
if args.flag_all:
print(" - " + node_id)
for meta in metadata[node_id]:
name = meta.prefixed_name
if args.flag_fullnames:
name = meta.namespace + meta.name
record_property(name, meta.value, meta.datatype_hint)
if args.flag_all:
print(" - %s: %s (%s)" % (name, meta.value, meta.datatype_hint))
if args.flag_all:
print("")
# Display summary.
print("Metadata summary:")
for pname in property_values.keys():
types = property_types[pname].keys()
if len(types) == 0:
types = ['no type information']
values = sorted(property_values[pname].keys(), key=lambda k: property_values[pname][k], reverse = True)
print(" - " + pname + " (" + ', '.join(sorted(types)) + "): ")
count = 0
count_unique = 0
count_entries = 0
for value in values:
display_value = value
if display_value is None or display_value.strip() == "":
display_value = "(blank)"
count += 1
if count <= 10:
print(" - " + display_value + " [%d]" % (property_values[pname][value]))
elif count == len(values):
print(" ... (%d entries with %d unique values)" % (count_entries, count_unique))
print(" - " + display_value + " [%d]" % (property_values[pname][value]))
else:
count_unique += 1
count_entries += property_values[pname][value]
print("")
# Write out a ladderized file.
if args.ladderize:
for tree_list in ds.tree_lists:
for tree in tree_list:
tree.ladderize(ascending=False)
fh_ladder = open(args.ladderize, "w")
ds.write(fh_ladder, schema='nexml')
fh_ladder.close()