GnuCash  5.6-150-g038405b370+
sixtp-dom-parsers.cpp
1 /********************************************************************
2  * sixtp-dom-parsers.c *
3  * Copyright 2001 Gnumatic, Inc. *
4  * *
5  * This program is free software; you can redistribute it and/or *
6  * modify it under the terms of the GNU General Public License as *
7  * published by the Free Software Foundation; either version 2 of *
8  * the License, or (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License*
16  * along with this program; if not, contact: *
17  * *
18  * Free Software Foundation Voice: +1-617-542-5942 *
19  * 51 Franklin Street, Fifth Floor Fax: +1-617-542-2652 *
20  * Boston, MA 02110-1301, USA gnu@gnu.org *
21  * *
22  ********************************************************************/
23 #include <glib.h>
24 
25 #include <config.h>
26 
27 #include <string.h>
28 
29 #include <gnc-engine.h>
30 
31 #include "gnc-xml-helper.h"
32 #include "sixtp-utils.h"
33 #include "sixtp-dom-parsers.h"
34 #include <kvp-frame.hpp>
35 
36 static QofLogModule log_module = GNC_MOD_IO;
37 
38 const char*
39 dom_node_to_text (xmlNodePtr node) noexcept
40 {
41  if (node && node->children && node->children->type == XML_TEXT_NODE
42  && !node->children->next)
43  return reinterpret_cast<const char*>(node->children->content);
44  return nullptr;
45 }
46 
47 std::optional<GncGUID>
48 dom_tree_to_guid (xmlNodePtr node)
49 {
50  auto type = xmlGetProp (node, BAD_CAST "type");
51  if (!type)
52  return {};
53 
54  bool ok = !g_strcmp0 ((char*)type, "guid") || !g_strcmp0 ((char*)type, "new");
55 
56  xmlFree (type);
57 
58  if (!ok)
59  return {};
60 
61  auto extract_guid = [](auto str) -> std::optional<GncGUID>
62  {
63  if (GncGUID guid; string_to_guid (str, &guid))
64  return guid;
65 
66  return {};
67  };
68 
69  return apply_xmlnode_text<std::optional<GncGUID>>(extract_guid, node);
70 }
71 
72 static KvpValue*
73 dom_tree_to_integer_kvp_value (xmlNodePtr node)
74 {
75  auto node_to_int_kvp = [](auto txt) -> KvpValue*
76  {
77  if (gint64 daint; string_to_gint64 (txt, &daint))
78  return new KvpValue{daint};
79 
80  return nullptr;
81  };
82  return apply_xmlnode_text<KvpValue*> (node_to_int_kvp, node, nullptr);
83 }
84 
85 template <typename T>
86 static bool
87 dom_tree_to_num (xmlNodePtr node, std::function<bool(const char*, T*)>string_to_num, T* num_ptr)
88 {
89  return apply_xmlnode_text<T>([&](auto txt){ return string_to_num (txt, num_ptr);}, node, false);
90 }
91 
92 gboolean
93 dom_tree_to_integer (xmlNodePtr node, gint64* daint)
94 {
95  return dom_tree_to_num<gint64>(node, string_to_gint64, daint);
96 }
97 
98 gboolean
99 dom_tree_to_guint16 (xmlNodePtr node, guint16* i)
100 {
101  return dom_tree_to_num<guint16>(node, string_to_guint16, i);
102 }
103 
104 gboolean
105 dom_tree_to_guint (xmlNodePtr node, guint* i)
106 {
107  return dom_tree_to_num<guint>(node, string_to_guint, i);
108 }
109 
110 gboolean
111 dom_tree_to_boolean (xmlNodePtr node, gboolean* b)
112 {
113  auto set_bool = [b](auto text) -> gboolean
114  {
115  if (g_ascii_strncasecmp (text, "true", 4) == 0)
116  {
117  *b = TRUE;
118  return TRUE;
119  }
120  else if (g_ascii_strncasecmp (text, "false", 5) == 0)
121  {
122  *b = FALSE;
123  return TRUE;
124  }
125  else
126  {
127  *b = FALSE;
128  return FALSE;
129  }
130  };
131  return apply_xmlnode_text<gboolean> (set_bool, node);
132 }
133 
134 static KvpValue*
135 dom_tree_to_double_kvp_value (xmlNodePtr node)
136 {
137  auto node_to_double_kvp = [](auto txt) -> KvpValue*
138  {
139  if (double dadoub; string_to_double (txt, &dadoub)) return new KvpValue{dadoub};
140  return nullptr;
141  };
142  return apply_xmlnode_text<KvpValue*> (node_to_double_kvp, node, nullptr);
143 }
144 
145 static KvpValue*
146 dom_tree_to_numeric_kvp_value (xmlNodePtr node)
147 {
148  return new KvpValue {dom_tree_to_gnc_numeric (node)};
149 }
150 
151 static KvpValue*
152 dom_tree_to_string_kvp_value (xmlNodePtr node)
153 {
154  auto node_to_string_kvp = [](auto txt) -> KvpValue*
155  {
156  return new KvpValue {g_strdup (txt)};
157  };
158  return apply_xmlnode_text<KvpValue*> (node_to_string_kvp, node, nullptr);
159 }
160 
161 static KvpValue*
162 dom_tree_to_guid_kvp_value (xmlNodePtr node)
163 {
164  auto daguid = dom_tree_to_guid (node);
165  return daguid ? new KvpValue {guid_copy (&*daguid)} : nullptr;
166 }
167 
168 static KvpValue*
169 dom_tree_to_time64_kvp_value (xmlNodePtr node)
170 {
171  Time64 t{dom_tree_to_time64 (node)};
172  return new KvpValue {t};
173 }
174 
175 static KvpValue*
176 dom_tree_to_gdate_kvp_value (xmlNodePtr node)
177 {
178  auto date = dom_tree_to_gdate (node);
179  if (!date) return nullptr;
180  auto rv{new KvpValue {*date}};
181  g_date_free (date);
182  return rv;
183 }
184 
185 gboolean
186 string_to_binary (const gchar* str, void** v, guint64* data_len)
187 {
188  guint64 str_len;
189  guchar* data;
190  unsigned int i, j;
191 
192  g_return_val_if_fail (v != NULL, FALSE);
193  g_return_val_if_fail (data_len != NULL, FALSE);
194 
195  str_len = strlen (str);
196 
197  /* Since no whitespace is allowed and hex encoding is 2 text chars
198  per binary char, the result must be half the input size and the
199  input size must be even. */
200  if ((str_len % 2) != 0)
201  return (FALSE);
202  *data_len = str_len / 2;
203  data = g_new0 (guchar, *data_len);
204 
205  for (j = 0, i = 0; i < str_len; i += 2, j++)
206  {
207  gchar tmpstr[3];
208  long int converted;
209 
210  tmpstr[0] = str[i];
211  tmpstr[1] = str[i + 1];
212  tmpstr[2] = '\0';
213 
214  converted = strtol (tmpstr, NULL, 16);
215 
216  data[j] = (unsigned char)converted;
217  }
218 
219  *v = data;
220 
221  return (TRUE);
222 }
223 
224 static KvpValue* dom_tree_to_kvp_value (xmlNodePtr node);
225 //needed for test access as well as internal use.
226 KvpFrame* dom_tree_to_kvp_frame (xmlNodePtr node);
227 
228 static KvpValue*
229 dom_tree_to_list_kvp_value (xmlNodePtr node)
230 {
231  GList* list = NULL;
232  xmlNodePtr mark;
233  KvpValue* ret = NULL;
234 
235  for (mark = node->xmlChildrenNode; mark; mark = mark->next)
236  {
237  KvpValue* new_val;
238 
239  if (g_strcmp0 ((char*)mark->name, "text") == 0)
240  continue;
241 
242  new_val = dom_tree_to_kvp_value (mark);
243  if (new_val)
244  {
245  list = g_list_prepend (list, (gpointer)new_val);
246  }
247  }
248 
249  list = g_list_reverse (list);
250 
251  ret = new KvpValue {list};
252 
253  return ret;
254 }
255 
256 static KvpValue*
257 dom_tree_to_frame_kvp_value (xmlNodePtr node)
258 {
259  KvpFrame* frame = dom_tree_to_kvp_frame (node);
260  return frame ? new KvpValue {frame} : nullptr;
261 }
262 
263 
265 {
266  const gchar* tag;
267  KvpValue* (*converter) (xmlNodePtr node);
268 };
269 /* Note: The type attribute must remain 'timespec' to maintain compatibility.
270  */
271 
272 struct kvp_val_converter val_converters[] =
273 {
274  { "integer", dom_tree_to_integer_kvp_value },
275  { "double", dom_tree_to_double_kvp_value },
276  { "numeric", dom_tree_to_numeric_kvp_value },
277  { "string", dom_tree_to_string_kvp_value },
278  { "guid", dom_tree_to_guid_kvp_value },
279  { "timespec", dom_tree_to_time64_kvp_value },
280  { "gdate", dom_tree_to_gdate_kvp_value },
281  { "list", dom_tree_to_list_kvp_value },
282  { "frame", dom_tree_to_frame_kvp_value },
283  { 0, 0 },
284 };
285 
286 static KvpValue*
287 dom_tree_to_kvp_value (xmlNodePtr node)
288 {
289  xmlChar* xml_type;
290  struct kvp_val_converter* mark;
291  KvpValue* ret = NULL;
292 
293  xml_type = xmlGetProp (node, BAD_CAST "type");
294 
295  for (mark = val_converters; mark->tag; mark++)
296  {
297  if (g_strcmp0 (reinterpret_cast<char*>(xml_type), mark->tag) == 0)
298  {
299  ret = (mark->converter) (node);
300  }
301  }
302 
303  if (!mark->tag)
304  {
305  /* FIXME: deal with unknown type tag here */
306  }
307 
308  xmlFree (xml_type);
309 
310  return ret;
311 }
312 
313 static gboolean
314 dom_tree_to_kvp_frame_given (xmlNodePtr node, KvpFrame* frame)
315 {
316  xmlNodePtr mark;
317 
318  g_return_val_if_fail (node, FALSE);
319  g_return_val_if_fail (frame, FALSE);
320 
321  for (mark = node->xmlChildrenNode; mark; mark = mark->next)
322  {
323  if (g_strcmp0 ((char*)mark->name, "slot") == 0)
324  {
325  xmlNodePtr mark2;
326  const gchar* key = NULL;
327  std::optional<std::string> maybe_key;
328  KvpValue* val = NULL;
329 
330  for (mark2 = mark->xmlChildrenNode; mark2; mark2 = mark2->next)
331  {
332  if (g_strcmp0 ((char*)mark2->name, "slot:key") == 0)
333  {
334  key = dom_node_to_text (mark2);
335  if (!key)
336  {
337  maybe_key = dom_tree_to_text (mark2);
338  key = maybe_key ? maybe_key->c_str() : nullptr;
339  }
340  }
341  else if (g_strcmp0 ((char*)mark2->name, "slot:value") == 0)
342  {
343  val = dom_tree_to_kvp_value (mark2);
344  }
345  else
346  {
347  /* FIXME: should put some error here.
348  * But ignore text type! */
349  }
350  }
351 
352  if (key)
353  {
354  if (val)
355  {
356  //We're deleting the old KvpValue returned by replace_nc().
357  delete frame->set ({key}, val);
358  }
359  else
360  {
361  /* FIXME: should put some error here */
362  }
363  }
364  }
365  }
366 
367  return TRUE;
368 }
369 
370 
371 KvpFrame*
372 dom_tree_to_kvp_frame (xmlNodePtr node)
373 {
374  g_return_val_if_fail (node, NULL);
375 
376  auto ret = new KvpFrame;
377 
378  if (dom_tree_to_kvp_frame_given (node, ret))
379  return ret;
380 
381  delete ret;
382  return NULL;
383 }
384 
385 gboolean
386 dom_tree_create_instance_slots (xmlNodePtr node, QofInstance* inst)
387 {
388  KvpFrame* frame = qof_instance_get_slots (inst);
389  return dom_tree_to_kvp_frame_given (node, frame);
390 }
391 
392 std::optional<std::string>
393 dom_tree_to_text (xmlNodePtr tree)
394 {
395  /* Expect *only* text and comment sibling nodes in the given tree --
396  which actually may only be a "list". i.e. if you're trying to
397  extract bar from <foo>bar</foo>, pass in <foo>->xmlChildrenNode
398  to this function. This expectation is different from the rest of
399  the dom_tree_to_* converters...
400 
401  Ignores comment nodes and collapse text nodes into one string.
402  Returns NULL if expectations are unsatisfied.
403  */
404  std::string rv;
405  gchar* temp;
406 
407  g_return_val_if_fail (tree, std::nullopt);
408 
409  /* no nodes means it's an empty string text */
410  if (!tree->xmlChildrenNode)
411  {
412  DEBUG ("No children");
413  return "";
414  }
415 
416  temp = (char*)xmlNodeListGetString (NULL, tree->xmlChildrenNode, TRUE);
417  if (!temp)
418  {
419  DEBUG ("Null string");
420  return std::nullopt;
421  }
422 
423  DEBUG ("node string [%s]", (temp == NULL ? "(null)" : temp));
424  rv = temp;
425  xmlFree (temp);
426  return rv;
427 }
428 
429 gnc_numeric
430 dom_tree_to_gnc_numeric (xmlNodePtr node)
431 {
432  auto node_to_numeric = [](auto txt)
433  {
434  gnc_numeric num = gnc_numeric_from_string(txt);
435  return gnc_numeric_check (num) ? gnc_numeric_zero() : num;
436  };
437  return apply_xmlnode_text<gnc_numeric> (node_to_numeric, node, gnc_numeric_zero());
438 }
439 
440 
441 time64
442 dom_tree_to_time64 (xmlNodePtr node)
443 {
444  /* Turn something like this
445 
446  <date-posted>
447  <ts:date>Mon, 05 Jun 2000 23:16:19 -0500</ts:date>
448  </date-posted>
449 
450  into a time64, returning INT64_MAX that we're using to flag an erroneous
451  date if there's a problem. Only one ts:date element is permitted for any
452  date attribute.
453  */
454 
455  time64 ret {INT64_MAX};
456  gboolean seen = FALSE;
457  xmlNodePtr n;
458 
459  for (n = node->xmlChildrenNode; n; n = n->next)
460  {
461  switch (n->type)
462  {
463  case XML_COMMENT_NODE:
464  case XML_TEXT_NODE:
465  break;
466  case XML_ELEMENT_NODE:
467  if (g_strcmp0 ("ts:date", (char*)n->name) == 0)
468  {
469  if (seen)
470  {
471  return INT64_MAX;
472  }
473  seen = TRUE;
474  ret = apply_xmlnode_text<time64> (gnc_iso8601_to_time64_gmt, n, INT64_MAX);
475  }
476  break;
477  default:
478  PERR ("unexpected sub-node.");
479  return INT64_MAX;
480  break;
481  }
482  }
483 
484  if (!seen)
485  {
486  PERR ("no ts:date node found.");
487  return INT64_MAX;
488  }
489 
490  return ret;
491 }
492 
493 GDate*
494 dom_tree_to_gdate (xmlNodePtr node)
495 {
496  /* Turn something like this
497 
498  <sx:startdate>
499  <gdate>2001-04-03</gdate>
500  </sx:startdate>
501 
502  into a GDate. If the xml is invalid, returns NULL. */
503 
504  GDate ret;
505  gboolean seen_date = FALSE;
506  xmlNodePtr n;
507 
508  auto try_setting_date = [&ret](const char *content) -> bool
509  {
510  gint year = 0, month = 0, day = 0;
511  if (sscanf (content, "%d-%d-%d", &year, &month, &day) != 3) return false;
512  g_date_set_dmy (&ret, day, static_cast<GDateMonth>(month), year);
513  return (g_date_valid (&ret));
514  };
515 
516  /* creates an invalid date */
517  g_date_clear (&ret, 1);
518 
519  for (n = node->xmlChildrenNode; n; n = n->next)
520  {
521  switch (n->type)
522  {
523  case XML_COMMENT_NODE:
524  case XML_TEXT_NODE:
525  break;
526  case XML_ELEMENT_NODE:
527  if (g_strcmp0 ("gdate", (char*)n->name) == 0)
528  {
529  if (seen_date || !apply_xmlnode_text<bool> (try_setting_date, n))
530  return NULL;
531  seen_date = TRUE;
532  }
533  break;
534  default:
535  PERR ("unexpected sub-node.");
536  return NULL;
537  }
538  }
539 
540  if (!seen_date)
541  {
542  PWARN ("no gdate node found.");
543  return NULL;
544  }
545 
546  return g_date_copy (&ret);
547 }
548 
550 {
551  std::string space;
552  std::string id;
553 };
554 
555 std::string
556 gnc_strstrip (std::string_view sv)
557 {
558  while (!sv.empty () && g_ascii_isspace (sv.front())) sv.remove_prefix (1);
559  while (!sv.empty () && g_ascii_isspace (sv.back())) sv.remove_suffix (1);
560  return std::string (sv);
561 }
562 
563 static std::optional<CommodityRef>
564 parse_commodity_ref (xmlNodePtr node, QofBook* book)
565 {
566  /* Turn something like this
567 
568  <currency>
569  <cmdty:space>NASDAQ</cmdty:space>
570  <cmdty:id>LNUX</cmdty:space>
571  </currency>
572 
573  into a gnc_commodity*, returning NULL on failure. Both sub-nodes
574  are required, though for now, order is irrelevant. */
575 
576  CommodityRef rv;
577  bool space_set{false};
578  bool id_set{false};
579  xmlNodePtr n;
580 
581  if (!node) return {};
582  if (!node->xmlChildrenNode) return {};
583 
584  for (n = node->xmlChildrenNode; n; n = n->next)
585  {
586  switch (n->type)
587  {
588  case XML_COMMENT_NODE:
589  case XML_TEXT_NODE:
590  break;
591  case XML_ELEMENT_NODE:
592  if (g_strcmp0 ("cmdty:space", (char*)n->name) == 0)
593  {
594  if (space_set)
595  {
596  return {};
597  }
598  rv.space = apply_xmlnode_text<std::string> (gnc_strstrip, n);
599  space_set = true;
600  }
601  else if (g_strcmp0 ("cmdty:id", (char*)n->name) == 0)
602  {
603  if (id_set)
604  {
605  return {};
606  }
607  rv.id = apply_xmlnode_text<std::string> (gnc_strstrip, n);
608  id_set = true;
609  }
610  break;
611  default:
612  PERR ("unexpected sub-node.");
613  return {};
614  break;
615  }
616  }
617  if (space_set && id_set)
618  return rv;
619 
620  return {};
621 }
622 
623 gnc_commodity*
624 dom_tree_to_commodity_ref_no_engine (xmlNodePtr node, QofBook* book)
625 {
626  auto ref = parse_commodity_ref (node, book);
627 
628  if (!ref)
629  return nullptr;
630 
631  return gnc_commodity_new (book, nullptr, ref->space.c_str(), ref->id.c_str(),
632  nullptr, 0);
633 }
634 
635 gnc_commodity*
636 dom_tree_to_commodity_ref (xmlNodePtr node, QofBook* book)
637 {
638  gnc_commodity* ret;
639  gnc_commodity_table* table;
640 
641  auto ref = parse_commodity_ref (node, book);
642 
643  if (!ref)
644  return nullptr;
645 
647 
648  g_return_val_if_fail (table != NULL, NULL);
649 
650  ret = gnc_commodity_table_lookup (table, ref->space.c_str(), ref->id.c_str());
651 
652  g_return_val_if_fail (ret != NULL, NULL);
653 
654  return ret;
655 }
656 
657 /***********************************************************************/
658 /* generic parser */
659 
660 static inline void
661 dom_tree_handlers_reset (struct dom_tree_handler* handlers)
662 {
663  for (; handlers->tag != NULL; handlers++)
664  {
665  handlers->gotten = 0;
666  }
667 }
668 
669 static inline gboolean
670 dom_tree_handlers_all_gotten_p (struct dom_tree_handler* handlers)
671 {
672  gboolean ret = TRUE;
673  for (; handlers->tag != NULL; handlers++)
674  {
675  if (handlers->required && ! handlers->gotten)
676  {
677  PERR ("Not defined and it should be: %s",
678  handlers->tag ? handlers->tag : "(null)");
679  ret = FALSE;
680  }
681  }
682  return ret;
683 }
684 
685 
686 static inline gboolean
687 gnc_xml_set_data (const gchar* tag, xmlNodePtr node, gpointer item,
688  struct dom_tree_handler* handlers)
689 {
690  for (; handlers->tag != NULL; handlers++)
691  {
692  if (g_strcmp0 (tag, handlers->tag) == 0)
693  {
694  (handlers->handler) (node, item);
695  handlers->gotten = TRUE;
696  break;
697  }
698  }
699 
700  if (!handlers->tag)
701  {
702  PERR ("Unhandled tag: %s",
703  tag ? tag : "(null)");
704  return FALSE;
705  }
706 
707  return TRUE;
708 }
709 
710 gboolean
711 dom_tree_generic_parse (xmlNodePtr node, struct dom_tree_handler* handlers,
712  gpointer data)
713 {
714  xmlNodePtr achild;
715  gboolean successful = TRUE;
716 
717  dom_tree_handlers_reset (handlers);
718 
719  for (achild = node->xmlChildrenNode; achild; achild = achild->next)
720  {
721  /* ignore stray text nodes */
722  if (g_strcmp0 ((char*)achild->name, "text") == 0)
723  continue;
724 
725  if (!gnc_xml_set_data ((char*)achild->name, achild, data, handlers))
726  {
727  PERR ("gnc_xml_set_data failed");
728  successful = FALSE;
729  continue;
730  }
731  }
732 
733  if (!dom_tree_handlers_all_gotten_p (handlers))
734  {
735  PERR ("didn't find all of the expected tags in the input");
736  successful = FALSE;
737  }
738 
739  return successful;
740 }
741 
742 gboolean
743 dom_tree_valid_time64 (time64 val, const xmlChar * name)
744 {
745  if (val != INT64_MAX)
746  return TRUE;
747  g_warning ("Invalid timestamp in data file. Look for a '%s' entry "
748  "with a year outside of the valid range: 1400..10000", name);
749  return FALSE;
750 }
time64 gnc_iso8601_to_time64_gmt(const gchar *)
The gnc_iso8601_to_time64_gmt() routine converts an ISO-8601 style date/time string to time64...
gnc_commodity_table * gnc_commodity_table_get_table(QofBook *book)
Returns the commodity table associated with a book.
GncGUID * guid_copy(const GncGUID *guid)
Returns a newly allocated GncGUID that matches the passed-in GUID.
Definition: guid.cpp:155
#define DEBUG(format, args...)
Print a debugging message.
Definition: qoflog.h:264
gboolean string_to_guid(const gchar *string, GncGUID *guid)
Given a string, replace the given guid with the parsed one unless the given value is null...
#define PERR(format, args...)
Log a serious error.
Definition: qoflog.h:244
#define PWARN(format, args...)
Log a warning.
Definition: qoflog.h:250
gnc_commodity * gnc_commodity_new(QofBook *book, const char *fullname, const char *name_space, const char *mnemonic, const char *cusip, int fraction)
Create a new commodity.
All type declarations for the whole Gnucash engine.
gnc_numeric gnc_numeric_from_string(const gchar *str)
Read a gnc_numeric from str, skipping any leading whitespace.
GNCNumericErrorCode gnc_numeric_check(gnc_numeric a)
Check for error signal in value.
gint64 time64
Most systems that are currently maintained, including Microsoft Windows, BSD-derived Unixes and Linux...
Definition: gnc-date.h:87
The type used to store guids in C.
Definition: guid.h:75